summaryrefslogtreecommitdiff
path: root/word-aligner
diff options
context:
space:
mode:
authorPaul Baltescu <pauldb89@gmail.com>2013-04-24 17:18:10 +0100
committerPaul Baltescu <pauldb89@gmail.com>2013-04-24 17:18:10 +0100
commitba206aaac1d95e76126443c9e7ccc5941e879849 (patch)
tree13a918da3f3983fd8e4cb74e7cdc3f5e1fc01cd1 /word-aligner
parentc2aede0f19b7a5e43581768b8c4fbfae8b92c68c (diff)
parentdb960a8bba81df3217660ec5a96d73e0d6baa01b (diff)
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'word-aligner')
-rwxr-xr-xword-aligner/ortho-norm/ru.pl44
1 files changed, 44 insertions, 0 deletions
diff --git a/word-aligner/ortho-norm/ru.pl b/word-aligner/ortho-norm/ru.pl
new file mode 100755
index 00000000..34452d06
--- /dev/null
+++ b/word-aligner/ortho-norm/ru.pl
@@ -0,0 +1,44 @@
+#!/usr/bin/perl -w
+use strict;
+use utf8;
+binmode(STDIN,":utf8");
+binmode(STDOUT,":utf8");
+while(<STDIN>) {
+ $_ = uc $_;
+ s/А/a/g;
+ s/І/i/g;
+ s/Б/b/g;
+ s/В/v/g;
+ s/Г/g/g;
+ s/Д/d/g;
+ s/Е/e/g;
+ s/Ж/zh/g;
+ s/З/z/g;
+ s/И/i/g;
+ s/Й/i/g;
+ s/К/k/g;
+ s/Л/l/g;
+ s/М/m/g;
+ s/Н/n/g;
+ s/О/o/g;
+ s/П/p/g;
+ s/Р/r/g;
+ s/С/s/g;
+ s/Т/t/g;
+ s/У/u/g;
+ s/Ф/f/g;
+ s/Х/kh/g;
+ s/Ц/c/g;
+ s/Ч/ch/g;
+ s/Ш/sh/g;
+ s/Щ/shch/g;
+ s/Ъ//g;
+ s/Ы//g;
+ s/Ь//g;
+ s/Э/e/g;
+ s/Ю/yo/g;
+ s/Я/ya/g;
+ $_ = lc $_;
+ print;
+}
+