blob: f78501d27b01b0e1ada8b3bc1d8857e7038f01cd (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
|
#!/usr/bin/perl -w
use strict;
my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path cwd /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; }
my $GZIP = 'gzip';
my $ZCAT = 'gunzip -c';
my $EXTOOLS = "$SCRIPT_DIR/../../extools";
die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS;
my $FILTER = "$EXTOOLS/filter_grammar";
my $SCORE = "$EXTOOLS/score_grammar";
assert_exec($FILTER, $SCORE);
usage() unless scalar @ARGV == 3;
my $corpus = $ARGV[0];
my $grammar = $ARGV[1];
my $testset = $ARGV[2];
die "Can't find corpus: $corpus" unless -f $corpus;
die "Can't find corpus: $grammar" unless -f $grammar;
die "Can't find corpus: $testset" unless -f $testset;
print STDERR " CORPUS: $corpus\n";
print STDERR " GRAMMAR: $corpus\n";
print STDERR "TEST SET: $corpus\n";
print STDERR "Extracting...\n";
safesystem("$ZCAT $grammar | $FILTER $testset | $SCORE $corpus") or die "Failed";
sub usage {
print <<EOT;
Usage: $0 corpus.src_trg_al grammar.gz test-set.txt > filtered-grammar.scfg.txt
Filter and score a grammar for a test set.
EOT
exit 1;
};
sub assert_exec {
my @files = @_;
for my $file (@files) {
die "Can't find $file - did you run make?\n" unless -e $file;
die "Can't execute $file" unless -e $file;
}
};
sub safesystem {
print STDERR "Executing: @_\n";
system(@_);
if ($? == -1) {
print STDERR "ERROR: Failed to execute: @_\n $!\n";
exit(1);
}
elsif ($? & 127) {
printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n",
($? & 127), ($? & 128) ? 'with' : 'without';
exit(1);
}
else {
my $exitcode = $? >> 8;
print STDERR "Exit code: $exitcode\n" if $exitcode;
return ! $exitcode;
}
}
|