diff options
| -rwxr-xr-x | gi/pipeline/evaluation-pipeline.pl | 12 | ||||
| -rw-r--r-- | gi/pipeline/valhalla.config | 8 | 
2 files changed, 19 insertions, 1 deletions
| diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl index 8ee41122..c6dcca05 100755 --- a/gi/pipeline/evaluation-pipeline.pl +++ b/gi/pipeline/evaluation-pipeline.pl @@ -70,6 +70,16 @@ assert_exec($CDEC, $PARALLELIZE, $FILTER, $FEATURIZE, $DISTVEST);  my $numtopics = 25;  my $config = "$SCRIPT_DIR/clsp.config"; +if ((scalar @ARGV) >= 2 && ($ARGV[0] eq '-c')) { +  $config = $ARGV[1]; +  shift @ARGV; shift @ARGV; +  unless (-f $config) { +    $config = "$SCRIPT_DIR/$config"; +    unless (-f $config) { +      $config .= ".config"; +    } +  } +}  print STDERR "CORPORA CONFIGURATION: $config\n";  open CONF, "<$config" or die "Can't read $config: $!";  my %paths; @@ -296,7 +306,7 @@ EOT  sub print_help {    print STDERR<<EOT; -Usage: $0 [OPTIONS] language-pair grammar.bidir.gz +Usage: $0 [-c data-config-file] language-pair grammar.bidir.gz [OPTIONS]  Given an induced grammar for an entire corpus (i.e., generated by  local-gi-pipeline.pl), filter and featurize it for a dev and test set, diff --git a/gi/pipeline/valhalla.config b/gi/pipeline/valhalla.config new file mode 100644 index 00000000..503cbd4a --- /dev/null +++ b/gi/pipeline/valhalla.config @@ -0,0 +1,8 @@ +# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED +# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ... +btec /home/chris/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh +fbis /home/chris/ws10smt/data/chinese-english.fbis corpus.zh-en.al +zhen /home/chris/ws10smt/data/chinese-english corpus.zh-en.al +aren /home/chris/ws10smt/data/arabic-english corpus.ar-en.al +uren /home/chris/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh +nlfr /home/chris/ws10smt/data/dutch-french corpus.nl-fr.al | 
