diff options
| author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-03-11 17:06:38 -0400 | 
|---|---|---|
| committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2013-03-11 17:06:38 -0400 | 
| commit | 33e25664131ddd9507b1c9eef2fb238e1e8840a1 (patch) | |
| tree | 4902fa1e0666e53c6783c1affb7a723a507f8d30 /word-aligner/ortho-norm | |
| parent | bd65d6a4492e172a7840c010c5414ceb6f6acd56 (diff) | |
russian ortho norm
Diffstat (limited to 'word-aligner/ortho-norm')
| -rwxr-xr-x | word-aligner/ortho-norm/ru.pl | 44 | 
1 files changed, 44 insertions, 0 deletions
diff --git a/word-aligner/ortho-norm/ru.pl b/word-aligner/ortho-norm/ru.pl new file mode 100755 index 00000000..34452d06 --- /dev/null +++ b/word-aligner/ortho-norm/ru.pl @@ -0,0 +1,44 @@ +#!/usr/bin/perl -w +use strict; +use utf8; +binmode(STDIN,":utf8"); +binmode(STDOUT,":utf8"); +while(<STDIN>) { +  $_ = uc $_; +  s/А/a/g; +  s/І/i/g; +  s/Б/b/g; +  s/В/v/g; +  s/Г/g/g; +  s/Д/d/g; +  s/Е/e/g; +  s/Ж/zh/g; +  s/З/z/g; +  s/И/i/g; +  s/Й/i/g; +  s/К/k/g; +  s/Л/l/g; +  s/М/m/g; +  s/Н/n/g; +  s/О/o/g; +  s/П/p/g; +  s/Р/r/g; +  s/С/s/g; +  s/Т/t/g; +  s/У/u/g; +  s/Ф/f/g; +  s/Х/kh/g; +  s/Ц/c/g; +  s/Ч/ch/g; +  s/Ш/sh/g; +  s/Щ/shch/g; +  s/Ъ//g; +  s/Ы//g; +  s/Ь//g; +  s/Э/e/g; +  s/Ю/yo/g; +  s/Я/ya/g; +  $_ = lc $_; +  print; +} +  | 
