summaryrefslogtreecommitdiff
path: root/sa-extract/escape-testset.pl
blob: 02fd7445c2a4d9a94982aafff5a30fb514d8dc94 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/perl -w

use utf8;
use strict;

binmode(STDIN,":utf8");
binmode(STDOUT,":utf8");

my @fh = ();
if (scalar @ARGV == 0) {
  push @fh, \*STDIN;
} else {
  for my $file (@ARGV) {
    my $f;
    open $f, "<$file" or die "Can't read $file: $!\n";
    binmode $f, ":utf8";
    push @fh, $f;
  }
}

my $id = -1;
for my $f (@fh) {
  while(<$f>) {
    chomp;
    die "Empty line in test set" if /^\s*$/;
    die "Please remove <seg> tags from input:\n$_" if /^\s*<seg/i;
    $id++;
    s/&/\&amp;/g;
    s/</\&lt;/g;
    s/>/\&gt;/g;
    print "<seg id=\"$id\"> $_ </seg>\n";
  }
}