summaryrefslogtreecommitdiff
path: root/word-aligner
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-03-11 17:06:38 -0400
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-03-11 17:06:38 -0400
commit33e25664131ddd9507b1c9eef2fb238e1e8840a1 (patch)
tree4902fa1e0666e53c6783c1affb7a723a507f8d30 /word-aligner
parentbd65d6a4492e172a7840c010c5414ceb6f6acd56 (diff)
russian ortho norm
Diffstat (limited to 'word-aligner')
-rwxr-xr-xword-aligner/ortho-norm/ru.pl44
1 files changed, 44 insertions, 0 deletions
diff --git a/word-aligner/ortho-norm/ru.pl b/word-aligner/ortho-norm/ru.pl
new file mode 100755
index 00000000..34452d06
--- /dev/null
+++ b/word-aligner/ortho-norm/ru.pl
@@ -0,0 +1,44 @@
+#!/usr/bin/perl -w
+use strict;
+use utf8;
+binmode(STDIN,":utf8");
+binmode(STDOUT,":utf8");
+while(<STDIN>) {
+ $_ = uc $_;
+ s/А/a/g;
+ s/І/i/g;
+ s/Б/b/g;
+ s/В/v/g;
+ s/Г/g/g;
+ s/Д/d/g;
+ s/Е/e/g;
+ s/Ж/zh/g;
+ s/З/z/g;
+ s/И/i/g;
+ s/Й/i/g;
+ s/К/k/g;
+ s/Л/l/g;
+ s/М/m/g;
+ s/Н/n/g;
+ s/О/o/g;
+ s/П/p/g;
+ s/Р/r/g;
+ s/С/s/g;
+ s/Т/t/g;
+ s/У/u/g;
+ s/Ф/f/g;
+ s/Х/kh/g;
+ s/Ц/c/g;
+ s/Ч/ch/g;
+ s/Ш/sh/g;
+ s/Щ/shch/g;
+ s/Ъ//g;
+ s/Ы//g;
+ s/Ь//g;
+ s/Э/e/g;
+ s/Ю/yo/g;
+ s/Я/ya/g;
+ $_ = lc $_;
+ print;
+}
+