diff options
Diffstat (limited to 'gi/pipeline/scripts/remove-tags-from-corpus.pl')
-rwxr-xr-x | gi/pipeline/scripts/remove-tags-from-corpus.pl | 44 |
1 files changed, 0 insertions, 44 deletions
diff --git a/gi/pipeline/scripts/remove-tags-from-corpus.pl b/gi/pipeline/scripts/remove-tags-from-corpus.pl deleted file mode 100755 index be3e97c0..00000000 --- a/gi/pipeline/scripts/remove-tags-from-corpus.pl +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -use Getopt::Long "GetOptions"; - -my $LANGUAGE = shift @ARGV; -$LANGUAGE = 'target' unless ($LANGUAGE); - -my $lno = 0; -while(my $line = <>) { - $lno++; - chomp $line; - - my @fields = split / \|\|\| /, $line; - - if ($LANGUAGE eq "source" or $LANGUAGE eq "both") { - my @cwords = split /\s+/, $fields[0]; - foreach my $token (@cwords) { - my @parts = split /_(?!.*_)/, $token; - if (scalar @parts == 2) { - $token = $parts[0] - } else { - print STDERR "WARNING: invalid tagged token $token\n"; - } - } - $fields[0] = join ' ', @cwords; - } - - if ($LANGUAGE eq "target" or $LANGUAGE eq "both") { - my @cwords = split /\s+/, $fields[1]; - foreach my $token (@cwords) { - my @parts = split /_(?!.*_)/, $token; - if (scalar @parts == 2) { - $token = $parts[1] - } else { - print STDERR "WARNING: invalid tagged token $token\n"; - } - } - $fields[0] = join ' ', @cwords; - } - - print join ' ||| ', @fields; - print "\n"; -} |