summaryrefslogtreecommitdiff
path: root/gi/pipeline/evaluation-pipeline.pl
diff options
context:
space:
mode:
Diffstat (limited to 'gi/pipeline/evaluation-pipeline.pl')
-rwxr-xr-xgi/pipeline/evaluation-pipeline.pl27
1 files changed, 18 insertions, 9 deletions
diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl
index 178159b9..64803fd0 100755
--- a/gi/pipeline/evaluation-pipeline.pl
+++ b/gi/pipeline/evaluation-pipeline.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl -w
+sub m#!/usr/bin/perl -w
use strict;
use Getopt::Long;
use Cwd;
@@ -120,9 +120,11 @@ my $FEATURIZER_OPTS = '';
my $dataDir = '/export/ws10smt/data';
my @features;
my $bkoffgram;
+my $gluegram;
my $usefork;
if (GetOptions(
- "backoff_grammar" => \$bkoffgram,
+ "backoff_grammar=s" => \$bkoffgram,
+ "glue_grammar=s" => \$gluegram,
"data=s" => \$dataDir,
"features=s@" => \@features,
"use-fork" => \$usefork,
@@ -178,13 +180,21 @@ print STDERR "\nCREATING INITIAL WEIGHTS FILE: weights.init\n";
my $weights = mydircat($outdir, "weights.init");
write_random_weights_file($weights, @xfeats);
+my $bkoff_grmr;
+my $glue_grmr;
+if($bkoffgram) {
+ $bkoff_grmr = mydircat($outdir, "backoff.scfg.gz");
+ safesystem("cp $bkoffgram $bkoff_grmr");
+}
+if($gluegram) {
+ $glue_grmr = mydircat($outdir, "glue.bo.scfg.gz");
+ safesystem("cp $gluegram $glue_grmr");
+}
+
# MAKE DEV
print STDERR "\nFILTERING FOR dev...\n";
print STDERR "DEV: $dev (REFS=$drefs)\n";
my $devgrammar = filter($grammar, $dev, 'dev', $outdir);
-if($bkoffgram) {
- $devgrammar = add_backoff($devgrammar, $numtopics, 'dev', $outdir);
-}
my $devini = mydircat($outdir, "cdec-dev.ini");
write_cdec_ini($devini, $devgrammar);
@@ -194,9 +204,6 @@ print STDERR "\nFILTERING FOR test...\n";
print STDERR "TEST: $test (EVAL=$teval)\n";
`mkdir -p $outdir`;
my $testgrammar = filter($grammar, $test, 'test', $outdir);
-if($bkoffgram) {
- $testgrammar = add_backoff($testgrammar, $numtopics, 'test', $outdir);
-}
my $testini = mydircat($outdir, "cdec-test.ini");
write_cdec_ini($testini, $testgrammar);
@@ -296,11 +303,12 @@ sub mydircat {
sub write_cdec_ini {
my ($filename, $grammar_path) = (@_);
open CDECINI, ">$filename" or die "Can't write $filename: $!";
+ my $glue = ($gluegram ? "$glue_grmr" : "$datadir/glue/glue.scfg.gz");
print CDECINI <<EOT;
formalism=scfg
cubepruning_pop_limit=100
add_pass_through_rules=true
-scfg_extra_glue_grammar=$datadir/glue/glue.scfg.gz
+scfg_extra_glue_grammar=$glue
grammar=$datadir/oov.scfg.gz
grammar=$grammar_path
scfg_default_nt=OOV
@@ -308,6 +316,7 @@ scfg_no_hiero_glue_grammar=true
feature_function=WordPenalty
feature_function=LanguageModel -o 3 $LANG_MODEL
EOT
+ print CDECINI "grammar=$bkoff_grmr\n" if $bkoffgram;
close CDECINI;
};