From 925087356b853e2099c1b60d8b757d7aa02121a9 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- gi/scripts/buck2utf8.pl | 87 ------------------------------------------------- 1 file changed, 87 deletions(-) delete mode 100755 gi/scripts/buck2utf8.pl (limited to 'gi/scripts') diff --git a/gi/scripts/buck2utf8.pl b/gi/scripts/buck2utf8.pl deleted file mode 100755 index 1acfae8d..00000000 --- a/gi/scripts/buck2utf8.pl +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/perl -w -use strict; -use utf8; -binmode(STDOUT, ":utf8"); -while(<>) { - chomp; - my @words = split /\s+/; - for my $w (@words) { - $_ = $w; - if ($w =~ /^__NTK__/o) { - s/__NTK__//go; - next if /^$/; - print STDOUT "$_ "; - next; - } -s/tR/\x{0679}/g; # retroflex t -s/dR/\x{0688}/g; # retroflex d -s/rR/\x{0691}/g; # retroflex r -s/p/\x{067E}/g; # peh -s/c/\x{0686}/g; # tcheh -s/g/\x{06AF}/g; # geh (G=ghain) -s/@/\x{06BE}/g; # heh doachashmee -s/h'/\x{06c2}/g; # heh goal + hamza -s/h/\x{06c1}/g; # heh goal -s/J/\x{0698}/g; # zheh (rare, usually persian loan words) -s/k/\x{06A9}/g; # k -s/Y'/\x{06d3}/g; # yeh barree + hamza above (ligature) -s/y/\x{06cc}/g; # same as ya' in arabic -s/Y/\x{06d2}/g; # yeh barree -s/N/\x{06BA}/g; # Ghunna - - s/\'/\x{0621}/g; - s/\|/\x{0622}/g; - s/\>/\x{0623}/g; - s/\&/\x{0624}/g; - s/\