summaryrefslogtreecommitdiff
path: root/gi/scripts/buck2utf8.pl
blob: 1acfae8d0ee4f615fd0cd5bca8d80cd17b0a3fbe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/perl -w
use strict;
use utf8;
binmode(STDOUT, ":utf8");
while(<>) {
  chomp;
  my @words = split /\s+/;
  for my $w (@words) {
    $_ = $w;
    if ($w =~ /^__NTK__/o) {
      s/__NTK__//go;
      next if /^$/;
      print STDOUT "$_ ";
      next;
    }
s/tR/\x{0679}/g;  # retroflex t
s/dR/\x{0688}/g;  # retroflex d
s/rR/\x{0691}/g;  # retroflex r
s/p/\x{067E}/g;   # peh
s/c/\x{0686}/g;   # tcheh
s/g/\x{06AF}/g;   # geh (G=ghain)
s/@/\x{06BE}/g;   # heh doachashmee
s/h'/\x{06c2}/g;  # heh goal + hamza
s/h/\x{06c1}/g;   # heh goal
s/J/\x{0698}/g;   # zheh (rare, usually persian loan words)
s/k/\x{06A9}/g;   # k
s/Y'/\x{06d3}/g;  # yeh barree + hamza above (ligature)
s/y/\x{06cc}/g;   # same as ya' in arabic
s/Y/\x{06d2}/g;   # yeh barree
s/N/\x{06BA}/g;  # Ghunna

    s/\'/\x{0621}/g;
    s/\|/\x{0622}/g;
    s/\>/\x{0623}/g;
    s/\&/\x{0624}/g;
    s/\</\x{0625}/g;
    s/\}/\x{0626}/g;
    s/A/\x{0627}/g;
    s/b/\x{0628}/g;
    s/t/\x{062A}/g;
    s/v/\x{062B}/g;
    s/j/\x{062C}/g;
    s/H/\x{062D}/g;
    s/x/\x{062E}/g;
    s/d/\x{062F}/g;
    s/\*/\x{0630}/g;
    s/r/\x{0631}/g;
    s/z/\x{0632}/g;
    s/s/\x{0633}/g;
    s/\$/\x{0634}/g;
    s/S/\x{0635}/g;
    s/D/\x{0636}/g;
    s/T/\x{0637}/g;
    s/Z/\x{0638}/g;
    s/E/\x{0639}/g;
    s/g/\x{063A}/g;
    s/_/\x{0640}/g;
    s/f/\x{0641}/g;
    s/q/\x{0642}/g;
    s/k/\x{0643}/g;
    s/l/\x{0644}/g;
    s/m/\x{0645}/g;
    s/n/\x{0646}/g;
    s/h/\x{0647}/g;
    s/w/\x{0648}/g;
    s/Y/\x{0649}/g;
    s/y/\x{064A}/g;
    s/F/\x{064B}/g;
    s/N/\x{064C}/g;
    s/K/\x{064D}/g;
    s/a/\x{064E}/g;
    s/u/\x{064F}/g;
    s/i/\x{0650}/g;
    s/\~/\x{0651}/g;
    s/o/\x{0652}/g;
    s/\`/\x{0670}/g;
    s/\{/\x{0671}/g;
    s/P/\x{067E}/g;
    s/J/\x{0686}/g;
    s/V/\x{06A4}/g;
    s/G/\x{06AF}/g;


print STDOUT "$_ ";
  }
  print STDOUT "\n";
}