diff options
-rwxr-xr-x | gi/pipeline/evaluation-pipeline.pl | 12 | ||||
-rw-r--r-- | gi/pipeline/valhalla.config | 8 |
2 files changed, 19 insertions, 1 deletions
diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl index 8ee41122..c6dcca05 100755 --- a/gi/pipeline/evaluation-pipeline.pl +++ b/gi/pipeline/evaluation-pipeline.pl @@ -70,6 +70,16 @@ assert_exec($CDEC, $PARALLELIZE, $FILTER, $FEATURIZE, $DISTVEST); my $numtopics = 25; my $config = "$SCRIPT_DIR/clsp.config"; +if ((scalar @ARGV) >= 2 && ($ARGV[0] eq '-c')) { + $config = $ARGV[1]; + shift @ARGV; shift @ARGV; + unless (-f $config) { + $config = "$SCRIPT_DIR/$config"; + unless (-f $config) { + $config .= ".config"; + } + } +} print STDERR "CORPORA CONFIGURATION: $config\n"; open CONF, "<$config" or die "Can't read $config: $!"; my %paths; @@ -296,7 +306,7 @@ EOT sub print_help { print STDERR<<EOT; -Usage: $0 [OPTIONS] language-pair grammar.bidir.gz +Usage: $0 [-c data-config-file] language-pair grammar.bidir.gz [OPTIONS] Given an induced grammar for an entire corpus (i.e., generated by local-gi-pipeline.pl), filter and featurize it for a dev and test set, diff --git a/gi/pipeline/valhalla.config b/gi/pipeline/valhalla.config new file mode 100644 index 00000000..503cbd4a --- /dev/null +++ b/gi/pipeline/valhalla.config @@ -0,0 +1,8 @@ +# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED +# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ... +btec /home/chris/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh +fbis /home/chris/ws10smt/data/chinese-english.fbis corpus.zh-en.al +zhen /home/chris/ws10smt/data/chinese-english corpus.zh-en.al +aren /home/chris/ws10smt/data/arabic-english corpus.ar-en.al +uren /home/chris/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh +nlfr /home/chris/ws10smt/data/dutch-french corpus.nl-fr.al |