diff options
Diffstat (limited to 'gi/pipeline/evaluation-pipeline.pl')
-rwxr-xr-x | gi/pipeline/evaluation-pipeline.pl | 9 |
1 files changed, 6 insertions, 3 deletions
diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl index e940a5b9..675fd8c2 100755 --- a/gi/pipeline/evaluation-pipeline.pl +++ b/gi/pipeline/evaluation-pipeline.pl @@ -8,6 +8,7 @@ my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR my $JOBS = 15; my $PMEM = "9G"; +my $NUM_TRANSLATIONS = 30; # featurize_grammar may add multiple features from a single feature extractor # the key in this map is the extractor name, the value is a list of the extracted features @@ -132,6 +133,7 @@ if (GetOptions( "oov-grammar=s" => \$oovgram, "data=s" => \$dataDir, "pmem=s" => \$PMEM, + "n=i" => \$NUM_TRANSLATIONS, "features=s@" => \@features, "use-fork" => \$usefork, "jobs=i" => \$JOBS, @@ -279,7 +281,7 @@ sub filter { safesystem($out1, $cmd) or die "Filtering failed."; $cmd = "gunzip -c $out1 | $FEATURIZE $FEATURIZER_OPTS -g $out1 -c $CORPUS | gzip > $out2"; safesystem($out2, $cmd) or die "Featurizing failed"; - $cmd = "$FILTERBYF $out2 $outgrammar"; + $cmd = "$FILTERBYF $NUM_TRANSLATIONS $out2 $outgrammar"; safesystem($outgrammar, $cmd) or die "Secondary filtering failed"; } return $outgrammar; @@ -317,11 +319,12 @@ EOT sub print_help { print STDERR<<EOT; -Usage: $0 [-c data-config-file] language-pair grammar.bidir.gz [OPTIONS] +Usage: $0 [-c data-config-file] [-n N] language-pair grammar.bidir.gz [OPTIONS] Given an induced grammar for an entire corpus (i.e., generated by local-gi-pipeline.pl), filter and featurize it for a dev and test set, -run MERT, report scores. +run MERT, report scores. Use -n to specify the number of translations +to keep for a given source (30 is default). EOT } |