summaryrefslogtreecommitdiff
path: root/gi/pipeline/evaluation-pipeline.pl
diff options
context:
space:
mode:
authorbothameister <bothameister@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-23 18:03:47 +0000
committerbothameister <bothameister@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-23 18:03:47 +0000
commit8d222e20d8f253aa2c73d139d8ae6cc69483d071 (patch)
treef2a0fe94801dc1acf60d64c3a8af0d8e0d66163d /gi/pipeline/evaluation-pipeline.pl
parent6c26dfd1e1cc1974c8d7840d825a77bf55464bef (diff)
Adding morphology-segmentation stuff. Changes include: local-gi-pipeline (--morf arg), eval-pipeline (--oov-grammar, --lmorder)
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@382 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pipeline/evaluation-pipeline.pl')
-rwxr-xr-xgi/pipeline/evaluation-pipeline.pl10
1 files changed, 7 insertions, 3 deletions
diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl
index 13fe07cf..e940a5b9 100755
--- a/gi/pipeline/evaluation-pipeline.pl
+++ b/gi/pipeline/evaluation-pipeline.pl
@@ -123,16 +123,20 @@ my $dataDir = '/export/ws10smt/data';
my @features;
my $bkoffgram;
my $gluegram;
+my $oovgram;
my $usefork;
+my $lmorder = 3;
if (GetOptions(
"backoff-grammar=s" => \$bkoffgram,
"glue-grammar=s" => \$gluegram,
+ "oov-grammar=s" => \$oovgram,
"data=s" => \$dataDir,
"pmem=s" => \$PMEM,
"features=s@" => \@features,
"use-fork" => \$usefork,
"jobs=i" => \$JOBS,
"out-dir=s" => \$outdir,
+ "lmorder=i" => \$lmorder,
) == 0 || @ARGV!=2 || $help) {
print_help();
exit;
@@ -214,7 +218,6 @@ my $testini = mydircat($outdir, "cdec-test.ini");
write_cdec_ini($testini, $testgrammar);
-
# VEST
print STDERR "\nMINIMUM ERROR TRAINING\n";
my $tuned_weights = mydircat($outdir, 'weights.tuned');
@@ -294,17 +297,18 @@ sub write_cdec_ini {
my ($filename, $grammar_path) = (@_);
open CDECINI, ">$filename" or die "Can't write $filename: $!";
my $glue = ($gluegram ? "$glue_grmr" : "$datadir/glue/glue.scfg.gz");
+ my $oov = ($oovgram ? "$oovgram" : "$datadir/oov.scfg.gz");
print CDECINI <<EOT;
formalism=scfg
cubepruning_pop_limit=100
add_pass_through_rules=true
scfg_extra_glue_grammar=$glue
-grammar=$datadir/oov.scfg.gz
+grammar=$oov
grammar=$grammar_path
scfg_default_nt=OOV
scfg_no_hiero_glue_grammar=true
feature_function=WordPenalty
-feature_function=LanguageModel -o 3 $LANG_MODEL
+feature_function=LanguageModel -o $lmorder $LANG_MODEL
EOT
print CDECINI "grammar=$bkoff_grmr\n" if $bkoffgram;
close CDECINI;