summaryrefslogtreecommitdiff
path: root/word-aligner/ortho-norm/fr.pl
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-22 05:12:27 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-22 05:12:27 +0000
commit7cc92b65a3185aa242088d830e166e495674efc9 (patch)
tree681fe5237612a4e96ce36fb9fabef00042c8ee61 /word-aligner/ortho-norm/fr.pl
parent37728b8be4d0b3df9da81fdda2198ff55b4b2d91 (diff)
initial checkin
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@2 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'word-aligner/ortho-norm/fr.pl')
-rwxr-xr-xword-aligner/ortho-norm/fr.pl22
1 files changed, 22 insertions, 0 deletions
diff --git a/word-aligner/ortho-norm/fr.pl b/word-aligner/ortho-norm/fr.pl
new file mode 100755
index 00000000..5592ab05
--- /dev/null
+++ b/word-aligner/ortho-norm/fr.pl
@@ -0,0 +1,22 @@
+#!/usr/bin/perl -w
+use strict;
+use utf8;
+
+binmode(STDIN, ":utf8");
+binmode(STDOUT, ":utf8");
+
+while(<STDIN>) {
+ $_ = lc $_;
+ # see http://en.wikipedia.org/wiki/Use_of_the_circumflex_in_French
+ s/â/as/g;
+ s/ê/es/g;
+ s/î/is/g;
+ s/ô/os/g;
+ s/û/us/g;
+
+ s/ç/c/g;
+ s/é|è/e/g;
+ s/á/a/g;
+ print;
+}
+