summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gi/pipeline/clsp.config2
-rwxr-xr-xgi/pipeline/evaluation-pipeline.pl3
-rwxr-xr-xgi/pipeline/local-gi-pipeline.pl5
3 files changed, 8 insertions, 2 deletions
diff --git a/gi/pipeline/clsp.config b/gi/pipeline/clsp.config
index 49feada0..27161fab 100644
--- a/gi/pipeline/clsp.config
+++ b/gi/pipeline/clsp.config
@@ -4,6 +4,6 @@ btec /export/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1
fbis /export/ws10smt/data/chinese-english.fbis corpus.zh-en.al
zhen /export/ws10smt/data/chinese-english corpus.zh-en.al
aren /export/ws10smt/data/arabic-english corpus.ar-en.al
-uren /export/ws10smt/data/urdu-english corpus.ur-en.al
+uren /export/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh
nlfr /export/ws10smt/data/dutch-french corpus.nl-fr.al
diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl
index e0d13340..161dc170 100755
--- a/gi/pipeline/evaluation-pipeline.pl
+++ b/gi/pipeline/evaluation-pipeline.pl
@@ -100,7 +100,8 @@ if (GetOptions(
print_help();
exit;
}
-
+my @fkeys = keys %$feat_map;
+die "You must specify one or more features with -f. Known features: @fkeys\n" unless scalar @features > 0;
my @xfeats;
for my $feat (@features) {
my $rs = $feat_map->{$feat};
diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl
index 99b487db..a705af3b 100755
--- a/gi/pipeline/local-gi-pipeline.pl
+++ b/gi/pipeline/local-gi-pipeline.pl
@@ -43,8 +43,12 @@ my $TOPIC_TRAIN = "$PYPTOOLS/pyp-contexts-train";
assert_exec($SORT_KEYS, $REDUCER, $EXTRACTOR, $PYP_TOPICS_TRAIN, $S2L, $C2D, $TOPIC_TRAIN);
+my $BACKOFF_GRAMMAR;
+my $TAGGED_CORPUS;
+
my $OUTPUT = './giwork';
usage() unless &GetOptions('base_phrase_max_size=i' => \$BASE_PHRASE_MAX_SIZE,
+ 'backoff_grammar' => \$BACKOFF_GRAMMAR,
'output=s' => \$OUTPUT,
'model=s' => \$MODEL,
'topics=i' => \$NUM_TOPICS,
@@ -54,6 +58,7 @@ usage() unless &GetOptions('base_phrase_max_size=i' => \$BASE_PHRASE_MAX_SIZE,
'em-iterations=i' => \$NUM_EM_PR_ITERS,
'pr-scale=f' => \$PR_SCALE,
'pr-threads=i' => \$PR_THREADS,
+ 'tagged_corpus=s' => \$TAGGED_CORPUS,
);
usage() unless scalar @ARGV == 1;