blob: fca63aa8a1c0bcf516518a4c5f4401d59093d2c8 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
#!/usr/bin/perl -w
use strict;
$|++;
my $msg = "Usage: $0 (escape|unescape)\n\n Escapes XMl entities and other special characters for use with Moses.\n\n";
die $msg unless scalar @ARGV == 1;
if ($ARGV[0] eq "escape") {
while (<STDIN>) {
$_ =~ s/\&/\&/g; # escape escape
$_ =~ s/\|/\|/g; # factor separator
$_ =~ s/\</\</g; # xml
$_ =~ s/\>/\>/g; # xml
$_ =~ s/\'/\'/g; # xml
$_ =~ s/\"/\"/g; # xml
$_ =~ s/\[/\[/g; # syntax non-terminal
$_ =~ s/\]/\]/g; # syntax non-terminal
print;
}
} elsif ($ARGV[0] eq "unescape") {
while (<STDIN>) {
$_ =~ s/\|/\|/g; # factor separator
$_ =~ s/\</\</g; # xml
$_ =~ s/\>/\>/g; # xml
$_ =~ s/\'/\'/g; # xml
$_ =~ s/\"/\"/g; # xml
$_ =~ s/\[/\[/g; # syntax non-terminal
$_ =~ s/\]/\]/g; # syntax non-terminal
$_ =~ s/\&/\&/g; # escape escape
print;
}
} else {
die $msg;
}
|