diff options
| -rwxr-xr-x | corpus/moses-xml.pl | 36 | 
1 files changed, 36 insertions, 0 deletions
diff --git a/corpus/moses-xml.pl b/corpus/moses-xml.pl new file mode 100755 index 00000000..fca63aa8 --- /dev/null +++ b/corpus/moses-xml.pl @@ -0,0 +1,36 @@ +#!/usr/bin/perl -w + +use strict; +$|++; + +my $msg = "Usage: $0 (escape|unescape)\n\n  Escapes XMl entities and other special characters for use with Moses.\n\n"; + +die $msg unless scalar @ARGV == 1; + +if ($ARGV[0] eq "escape") { +    while (<STDIN>) { +        $_ =~ s/\&/\&/g;   # escape escape +        $_ =~ s/\|/\|/g;  # factor separator +        $_ =~ s/\</\</g;    # xml +        $_ =~ s/\>/\>/g;    # xml +        $_ =~ s/\'/\'/g;  # xml +        $_ =~ s/\"/\"/g;  # xml +        $_ =~ s/\[/\[/g;   # syntax non-terminal +        $_ =~ s/\]/\]/g;   # syntax non-terminal +        print; +    } +} elsif ($ARGV[0] eq "unescape") { +    while (<STDIN>) { +        $_ =~ s/\|/\|/g;  # factor separator +        $_ =~ s/\</\</g;    # xml +        $_ =~ s/\>/\>/g;    # xml +        $_ =~ s/\'/\'/g;  # xml +        $_ =~ s/\"/\"/g;  # xml +        $_ =~ s/\[/\[/g;   # syntax non-terminal +        $_ =~ s/\]/\]/g;   # syntax non-terminal +        $_ =~ s/\&/\&/g;   # escape escape +        print; +    } +} else { +    die $msg; +}  | 
