summaryrefslogtreecommitdiff
path: root/gi/pipeline/filter-for-test-set.pl
blob: f78501d27b01b0e1ada8b3bc1d8857e7038f01cd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/perl -w
use strict;
my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; }

my $GZIP = 'gzip';
my $ZCAT = 'gunzip -c';

my $EXTOOLS = "$SCRIPT_DIR/../../extools";
die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS;

my $FILTER = "$EXTOOLS/filter_grammar";
my $SCORE = "$EXTOOLS/score_grammar";

assert_exec($FILTER, $SCORE);

usage() unless scalar @ARGV == 3;
my $corpus = $ARGV[0];
my $grammar = $ARGV[1];
my $testset = $ARGV[2];
die "Can't find corpus: $corpus" unless -f $corpus;
die "Can't find corpus: $grammar" unless -f $grammar;
die "Can't find corpus: $testset" unless -f $testset;
print STDERR "  CORPUS: $corpus\n";
print STDERR " GRAMMAR: $corpus\n";
print STDERR "TEST SET: $corpus\n";
print STDERR "Extracting...\n";

safesystem("$ZCAT $grammar | $FILTER $testset | $SCORE $corpus") or die "Failed";

sub usage {
  print <<EOT;

Usage: $0 corpus.src_trg_al grammar.gz test-set.txt > filtered-grammar.scfg.txt

Filter and score a grammar for a test set.

EOT
  exit 1;
};

sub assert_exec {
  my @files = @_;
  for my $file (@files) {
    die "Can't find $file - did you run make?\n" unless -e $file;
    die "Can't execute $file" unless -e $file;
  }
};

sub safesystem {
  print STDERR "Executing: @_\n";
  system(@_);
  if ($? == -1) {
      print STDERR "ERROR: Failed to execute: @_\n  $!\n";
      exit(1);
  }
  elsif ($? & 127) {
      printf STDERR "ERROR: Execution of: @_\n  died with signal %d, %s coredump\n",
          ($? & 127),  ($? & 128) ? 'with' : 'without';
      exit(1);
  }
  else {
    my $exitcode = $? >> 8;
    print STDERR "Exit code: $exitcode\n" if $exitcode;
    return ! $exitcode;
  }
}