diff options
author | Chris Dyer <cdyer@cab.ark.cs.cmu.edu> | 2012-10-02 00:19:43 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cab.ark.cs.cmu.edu> | 2012-10-02 00:19:43 -0400 |
commit | 925087356b853e2099c1b60d8b757d7aa02121a9 (patch) | |
tree | 579925c5c9d3da51f43018a5c6d1c4dfbb72b089 /gi/scripts | |
parent | ea79e535d69f6854d01c62e3752971fb6730d8e7 (diff) |
cdec cleanup, remove bayesian stuff, parsing stuff
Diffstat (limited to 'gi/scripts')
-rwxr-xr-x | gi/scripts/buck2utf8.pl | 87 |
1 files changed, 0 insertions, 87 deletions
diff --git a/gi/scripts/buck2utf8.pl b/gi/scripts/buck2utf8.pl deleted file mode 100755 index 1acfae8d..00000000 --- a/gi/scripts/buck2utf8.pl +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use utf8; -binmode(STDOUT, ":utf8"); -while(<>) { - chomp; - my @words = split /\s+/; - for my $w (@words) { - $_ = $w; - if ($w =~ /^__NTK__/o) { - s/__NTK__//go; - next if /^$/; - print STDOUT "$_ "; - next; - } -s/tR/\x{0679}/g; # retroflex t -s/dR/\x{0688}/g; # retroflex d -s/rR/\x{0691}/g; # retroflex r -s/p/\x{067E}/g; # peh -s/c/\x{0686}/g; # tcheh -s/g/\x{06AF}/g; # geh (G=ghain) -s/@/\x{06BE}/g; # heh doachashmee -s/h'/\x{06c2}/g; # heh goal + hamza -s/h/\x{06c1}/g; # heh goal -s/J/\x{0698}/g; # zheh (rare, usually persian loan words) -s/k/\x{06A9}/g; # k -s/Y'/\x{06d3}/g; # yeh barree + hamza above (ligature) -s/y/\x{06cc}/g; # same as ya' in arabic -s/Y/\x{06d2}/g; # yeh barree -s/N/\x{06BA}/g; # Ghunna - - s/\'/\x{0621}/g; - s/\|/\x{0622}/g; - s/\>/\x{0623}/g; - s/\&/\x{0624}/g; - s/\</\x{0625}/g; - s/\}/\x{0626}/g; - s/A/\x{0627}/g; - s/b/\x{0628}/g; - s/t/\x{062A}/g; - s/v/\x{062B}/g; - s/j/\x{062C}/g; - s/H/\x{062D}/g; - s/x/\x{062E}/g; - s/d/\x{062F}/g; - s/\*/\x{0630}/g; - s/r/\x{0631}/g; - s/z/\x{0632}/g; - s/s/\x{0633}/g; - s/\$/\x{0634}/g; - s/S/\x{0635}/g; - s/D/\x{0636}/g; - s/T/\x{0637}/g; - s/Z/\x{0638}/g; - s/E/\x{0639}/g; - s/g/\x{063A}/g; - s/_/\x{0640}/g; - s/f/\x{0641}/g; - s/q/\x{0642}/g; - s/k/\x{0643}/g; - s/l/\x{0644}/g; - s/m/\x{0645}/g; - s/n/\x{0646}/g; - s/h/\x{0647}/g; - s/w/\x{0648}/g; - s/Y/\x{0649}/g; - s/y/\x{064A}/g; - s/F/\x{064B}/g; - s/N/\x{064C}/g; - s/K/\x{064D}/g; - s/a/\x{064E}/g; - s/u/\x{064F}/g; - s/i/\x{0650}/g; - s/\~/\x{0651}/g; - s/o/\x{0652}/g; - s/\`/\x{0670}/g; - s/\{/\x{0671}/g; - s/P/\x{067E}/g; - s/J/\x{0686}/g; - s/V/\x{06A4}/g; - s/G/\x{06AF}/g; - - -print STDOUT "$_ "; - } - print STDOUT "\n"; -} |