summaryrefslogtreecommitdiff
path: root/word-aligner/ortho-norm/fr.pl
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2010-01-29 15:56:59 +0000
committerChris Dyer <redpony@gmail.com>2010-01-29 15:56:59 +0000
commitda222df300e4f87ad185a7decbf119ad56aa34e0 (patch)
tree1137deefefd28b1a89f6b2b339883801cc12cb29 /word-aligner/ortho-norm/fr.pl
parentee4383b3bc67e2d8ce113fce716050dc2e1b8572 (diff)
word aligner checkin
Diffstat (limited to 'word-aligner/ortho-norm/fr.pl')
-rwxr-xr-xword-aligner/ortho-norm/fr.pl22
1 files changed, 22 insertions, 0 deletions
diff --git a/word-aligner/ortho-norm/fr.pl b/word-aligner/ortho-norm/fr.pl
new file mode 100755
index 00000000..5592ab05
--- /dev/null
+++ b/word-aligner/ortho-norm/fr.pl
@@ -0,0 +1,22 @@
+#!/usr/bin/perl -w
+use strict;
+use utf8;
+
+binmode(STDIN, ":utf8");
+binmode(STDOUT, ":utf8");
+
+while(<STDIN>) {
+ $_ = lc $_;
+ # see http://en.wikipedia.org/wiki/Use_of_the_circumflex_in_French
+ s/â/as/g;
+ s/ê/es/g;
+ s/î/is/g;
+ s/ô/os/g;
+ s/û/us/g;
+
+ s/ç/c/g;
+ s/é|è/e/g;
+ s/á/a/g;
+ print;
+}
+