blob: 0c5b9c260875475dcf9936b3bec867ef991481a3 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
#!/usr/bin/perl -w
use strict;
use utf8;
binmode(STDIN,"utf8");
binmode(STDOUT,"utf8");
while(<STDIN>) {
chomp;
$_ = " $_ ";
s/&\s*lt\s*;/</gi;
s/&\s*gt\s*;/>/gi;
s/&\s*squot\s*;/'/gi;
s/&\s*quot\s*;/"/gi;
s/&\s*amp\s*;/&/gi;
s/ (\d\d): (\d\d)/ $1:$2/g;
s/[\x{20a0}]\x{20ac}]/ EUR /g;
s/[\x{00A3}]/ GBP /g;
s/(\W)([A-Z]+\$?)(\d*\.\d+|\d+)/$1$2 $3/g;
s/(\W)(euro?)(\d*\.\d+|\d+)/$1EUR $3/gi;
s/&\s*#45\s*;\s*&\s*#45\s*;/--/g;
s/&\s*#45\s*;/--/g;
s/ ,,/ "/g;
s/``/"/g;
s/''/"/g;
s/〃/"/g;
s/¨/"/g;
s/¡/ ¡ /g;
s/¿/ ¿ /g;
s/ˇ/'/g;
s/´/'/g;
s/`/'/g;
s/’/'/g;
s/ ́/'/g;
s/‘/'/g;
s/ˉ/'/g;
s/β/ß/g; # WMT 2010 error
s/“/"/g;
s/”/"/g;
s/«/"/g;
s/»/"/g;
tr/!-~/!-~/;
s/、/,/g;
s/。/./g;
s/…/.../g;
s/―/--/g;
s/–/--/g;
s/─/--/g;
s/—/--/g;
s/•/ * /g;
s/\*/ * /g;
s/،/,/g;
s/؟/?/g;
s/ـ/ /g;
s/Ã ̄/i/g;
s/’/'/g;
s/â€"/"/g;
s/؛/;/g;
s/\s+/ /g;
s/^\s+//;
s/\s+$//;
s/[\x{00}-\x{1f}]//g;
print "$_\n";
}
|