summaryrefslogtreecommitdiff
path: root/corpus/add-sos-eos.pl
blob: 5e2d44cb7ccec8fae7e88065807ef2ffadf8703d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#!/usr/bin/perl -w
use strict;

while(<>) {
  chomp;
  my @fields = split / \|\|\| /;
  my ($ff, $ee, $aa) = @fields;
  die "Expected: foreign ||| target ||| alignments" unless scalar @fields == 3;
  my @fs = split /\s+/, $ff;
  my @es = split /\s+/, $ee;
  my @as = split /\s+/, $aa;
  my @oas = ();
  push @oas, '0-0';
  my $flen = scalar @fs;
  my $elen = scalar @es;
  for my $ap (@as) {
    my ($a, $b) = split /-/, $ap;
    die "Bad format in: @as" unless defined $a && defined $b;
    push @oas, ($a + 1) . '-' . ($b + 1);
  }
  push @oas, ($flen + 1) . '-' . ($elen + 1);
  print "<s> $ff </s> ||| <s> $ee </s> ||| @oas\n";
}