summaryrefslogtreecommitdiff
path: root/word-aligner/ortho-norm/fr.pl
blob: 5592ab05a7d8156b1df08383f9c6916044c8bf1b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#!/usr/bin/perl -w
use strict;
use utf8;

binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");

while(<STDIN>) {
  $_ = lc $_;
  # see http://en.wikipedia.org/wiki/Use_of_the_circumflex_in_French
  s/â/as/g;
  s/ê/es/g;
  s/î/is/g;
  s/ô/os/g;
  s/û/us/g;

  s/ç/c/g;
  s/é|è/e/g;
  s/á/a/g;
  print;
}