diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-03-11 17:06:38 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-03-11 17:06:38 -0400 |
commit | 33e25664131ddd9507b1c9eef2fb238e1e8840a1 (patch) | |
tree | 4902fa1e0666e53c6783c1affb7a723a507f8d30 /word-aligner | |
parent | bd65d6a4492e172a7840c010c5414ceb6f6acd56 (diff) |
russian ortho norm
Diffstat (limited to 'word-aligner')
-rwxr-xr-x | word-aligner/ortho-norm/ru.pl | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/word-aligner/ortho-norm/ru.pl b/word-aligner/ortho-norm/ru.pl new file mode 100755 index 00000000..34452d06 --- /dev/null +++ b/word-aligner/ortho-norm/ru.pl @@ -0,0 +1,44 @@ +#!/usr/bin/perl -w +use strict; +use utf8; +binmode(STDIN,":utf8"); +binmode(STDOUT,":utf8"); +while(<STDIN>) { + $_ = uc $_; + s/А/a/g; + s/І/i/g; + s/Б/b/g; + s/В/v/g; + s/Г/g/g; + s/Д/d/g; + s/Е/e/g; + s/Ж/zh/g; + s/З/z/g; + s/И/i/g; + s/Й/i/g; + s/К/k/g; + s/Л/l/g; + s/М/m/g; + s/Н/n/g; + s/О/o/g; + s/П/p/g; + s/Р/r/g; + s/С/s/g; + s/Т/t/g; + s/У/u/g; + s/Ф/f/g; + s/Х/kh/g; + s/Ц/c/g; + s/Ч/ch/g; + s/Ш/sh/g; + s/Щ/shch/g; + s/Ъ//g; + s/Ы//g; + s/Ь//g; + s/Э/e/g; + s/Ю/yo/g; + s/Я/ya/g; + $_ = lc $_; + print; +} + |