summaryrefslogtreecommitdiff
path: root/word-aligner/ortho-norm/fr.pl
diff options
context:
space:
mode:
Diffstat (limited to 'word-aligner/ortho-norm/fr.pl')
-rwxr-xr-xword-aligner/ortho-norm/fr.pl22
1 files changed, 22 insertions, 0 deletions
diff --git a/word-aligner/ortho-norm/fr.pl b/word-aligner/ortho-norm/fr.pl
new file mode 100755
index 00000000..5592ab05
--- /dev/null
+++ b/word-aligner/ortho-norm/fr.pl
@@ -0,0 +1,22 @@
+#!/usr/bin/perl -w
+use strict;
+use utf8;
+
+binmode(STDIN, ":utf8");
+binmode(STDOUT, ":utf8");
+
+while(<STDIN>) {
+ $_ = lc $_;
+ # see http://en.wikipedia.org/wiki/Use_of_the_circumflex_in_French
+ s/â/as/g;
+ s/ê/es/g;
+ s/î/is/g;
+ s/ô/os/g;
+ s/û/us/g;
+
+ s/ç/c/g;
+ s/é|è/e/g;
+ s/á/a/g;
+ print;
+}
+