diff options
author | Chris Dyer <cdyer@cab.ark.cs.cmu.edu> | 2012-11-06 00:02:58 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@cab.ark.cs.cmu.edu> | 2012-11-06 00:02:58 -0500 |
commit | d8d8dad6e63306b3c8e326b22fcfdf90856bb85e (patch) | |
tree | b4dc354bdb2080e110a8f9067d108cbacab5b3b8 /corpus | |
parent | 552793bbd50f634ea755b84d47ddcc6cd4f158f2 (diff) | |
parent | 9e5107a05bfabb76ce547d2849173c5a11aeba60 (diff) |
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'corpus')
-rwxr-xr-x | corpus/add-sos-eos.pl | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/corpus/add-sos-eos.pl b/corpus/add-sos-eos.pl new file mode 100755 index 00000000..5e2d44cb --- /dev/null +++ b/corpus/add-sos-eos.pl @@ -0,0 +1,24 @@ +#!/usr/bin/perl -w +use strict; + +while(<>) { + chomp; + my @fields = split / \|\|\| /; + my ($ff, $ee, $aa) = @fields; + die "Expected: foreign ||| target ||| alignments" unless scalar @fields == 3; + my @fs = split /\s+/, $ff; + my @es = split /\s+/, $ee; + my @as = split /\s+/, $aa; + my @oas = (); + push @oas, '0-0'; + my $flen = scalar @fs; + my $elen = scalar @es; + for my $ap (@as) { + my ($a, $b) = split /-/, $ap; + die "Bad format in: @as" unless defined $a && defined $b; + push @oas, ($a + 1) . '-' . ($b + 1); + } + push @oas, ($flen + 1) . '-' . ($elen + 1); + print "<s> $ff </s> ||| <s> $ee </s> ||| @oas\n"; +} + |