summaryrefslogtreecommitdiff
path: root/gi/pipeline/evaluation-pipeline.pl
diff options
context:
space:
mode:
Diffstat (limited to 'gi/pipeline/evaluation-pipeline.pl')
-rwxr-xr-xgi/pipeline/evaluation-pipeline.pl9
1 files changed, 6 insertions, 3 deletions
diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl
index e940a5b9..675fd8c2 100755
--- a/gi/pipeline/evaluation-pipeline.pl
+++ b/gi/pipeline/evaluation-pipeline.pl
@@ -8,6 +8,7 @@ my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR
my $JOBS = 15;
my $PMEM = "9G";
+my $NUM_TRANSLATIONS = 30;
# featurize_grammar may add multiple features from a single feature extractor
# the key in this map is the extractor name, the value is a list of the extracted features
@@ -132,6 +133,7 @@ if (GetOptions(
"oov-grammar=s" => \$oovgram,
"data=s" => \$dataDir,
"pmem=s" => \$PMEM,
+ "n=i" => \$NUM_TRANSLATIONS,
"features=s@" => \@features,
"use-fork" => \$usefork,
"jobs=i" => \$JOBS,
@@ -279,7 +281,7 @@ sub filter {
safesystem($out1, $cmd) or die "Filtering failed.";
$cmd = "gunzip -c $out1 | $FEATURIZE $FEATURIZER_OPTS -g $out1 -c $CORPUS | gzip > $out2";
safesystem($out2, $cmd) or die "Featurizing failed";
- $cmd = "$FILTERBYF $out2 $outgrammar";
+ $cmd = "$FILTERBYF $NUM_TRANSLATIONS $out2 $outgrammar";
safesystem($outgrammar, $cmd) or die "Secondary filtering failed";
}
return $outgrammar;
@@ -317,11 +319,12 @@ EOT
sub print_help {
print STDERR<<EOT;
-Usage: $0 [-c data-config-file] language-pair grammar.bidir.gz [OPTIONS]
+Usage: $0 [-c data-config-file] [-n N] language-pair grammar.bidir.gz [OPTIONS]
Given an induced grammar for an entire corpus (i.e., generated by
local-gi-pipeline.pl), filter and featurize it for a dev and test set,
-run MERT, report scores.
+run MERT, report scores. Use -n to specify the number of translations
+to keep for a given source (30 is default).
EOT
}