summaryrefslogtreecommitdiff
path: root/word-aligner/ortho-norm
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-03-11 17:06:38 -0400
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-03-11 17:06:38 -0400
commite81511381148fcb23e7803675ccacfc3c992c221 (patch)
tree8be07011c8fd8b38e0429d5bfcdb4e2046ee98cc /word-aligner/ortho-norm
parentc88ad1c8725dcdc97ef9944e9396f10f6053dce5 (diff)
russian ortho norm
Diffstat (limited to 'word-aligner/ortho-norm')
-rwxr-xr-xword-aligner/ortho-norm/ru.pl44
1 files changed, 44 insertions, 0 deletions
diff --git a/word-aligner/ortho-norm/ru.pl b/word-aligner/ortho-norm/ru.pl
new file mode 100755
index 00000000..34452d06
--- /dev/null
+++ b/word-aligner/ortho-norm/ru.pl
@@ -0,0 +1,44 @@
+#!/usr/bin/perl -w
+use strict;
+use utf8;
+binmode(STDIN,":utf8");
+binmode(STDOUT,":utf8");
+while(<STDIN>) {
+ $_ = uc $_;
+ s/А/a/g;
+ s/І/i/g;
+ s/Б/b/g;
+ s/В/v/g;
+ s/Г/g/g;
+ s/Д/d/g;
+ s/Е/e/g;
+ s/Ж/zh/g;
+ s/З/z/g;
+ s/И/i/g;
+ s/Й/i/g;
+ s/К/k/g;
+ s/Л/l/g;
+ s/М/m/g;
+ s/Н/n/g;
+ s/О/o/g;
+ s/П/p/g;
+ s/Р/r/g;
+ s/С/s/g;
+ s/Т/t/g;
+ s/У/u/g;
+ s/Ф/f/g;
+ s/Х/kh/g;
+ s/Ц/c/g;
+ s/Ч/ch/g;
+ s/Ш/sh/g;
+ s/Щ/shch/g;
+ s/Ъ//g;
+ s/Ы//g;
+ s/Ь//g;
+ s/Э/e/g;
+ s/Ю/yo/g;
+ s/Я/ya/g;
+ $_ = lc $_;
+ print;
+}
+