more support for other clusters

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@307 ec762483-ff6d-05da-a07a-a48fb63a330f
author: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-17 18:17:34 +0000
committer: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-17 18:17:34 +0000
commit: d24aa658436cbae3404146c106b0b7569eac60ed (patch)
tree: 047c40e0b8958726570dcccb64fbf562f94b04a4 /gi/pipeline
parent: a2d3ef53c27bfaf4a0e9b638d8bfe24d1114fe9c (diff)
3 files changed, 14 insertions, 4 deletions
diff --git a/gi/pipeline/clsp.config b/gi/pipeline/clsp.config
index 27161fab..f7f131a0 100644
--- a/gi/pipeline/clsp.config
+++ b/gi/pipeline/clsp.config
@@ -1,5 +1,6 @@
 # THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED
 # name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ...
+/export/ws10smt/data
 btec /export/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh
 fbis /export/ws10smt/data/chinese-english.fbis corpus.zh-en.al
 zhen /export/ws10smt/data/chinese-english corpus.zh-en.al
diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl
index c6dcca05..178159b9 100755
--- a/gi/pipeline/evaluation-pipeline.pl
+++ b/gi/pipeline/evaluation-pipeline.pl
@@ -6,6 +6,8 @@ my $CWD = getcwd;
 
 my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; }
 
+my $JOBS = 15;
+
 # featurize_grammar may add multiple features from a single feature extractor
 # the key in this map is the extractor name, the value is a list of the extracted features
 my $feat_map = {
@@ -89,6 +91,7 @@ my %devs;
 my %devrefs;
 my %tests;
 my %testevals;
+my $datadir;
 print STDERR "       LANGUAGE PAIRS:";
 while(<CONF>) {
   chomp;
@@ -96,6 +99,7 @@ while(<CONF>) {
   next if /^\s*$/;
   s/^\s+//;
   s/\s+$//;
+  if (! defined $datadir) { $datadir = $_; next; }
   my ($name, $path, $corpus, $lm, $dev, $devref, @xtests) = split /\s+/;
   $paths{$name} = $path;
   $corpora{$name} = $corpus;
@@ -116,15 +120,19 @@ my $FEATURIZER_OPTS = '';
 my $dataDir = '/export/ws10smt/data';
 my @features;
 my $bkoffgram;
+my $usefork;
 if (GetOptions(
         "backoff_grammar" => \$bkoffgram,
         "data=s" => \$dataDir,
         "features=s@" => \@features,
+        "use-fork" => \$usefork,
+        "jobs=i" => \$JOBS,
         "out-dir=s" => \$outdir,
 ) == 0 || @ARGV!=2 || $help) {
         print_help();
         exit;
 }
+if ($usefork) { $usefork="--use-fork"; } else { $usefork = ''; }
 my @fkeys = keys %$feat_map;
 push(@features, "BackoffRule") if $bkoffgram;
 die "You must specify one or more features with -f. Known features: @fkeys\n" unless scalar @features > 0;
@@ -200,7 +208,7 @@ my $tuned_weights = mydircat($outdir, 'weights.tuned');
 if (-f $tuned_weights) {
   print STDERR "TUNED WEIGHTS $tuned_weights EXISTS: REUSING\n";
 } else {
-  my $cmd = "$DISTVEST --ref-files=$drefs --source-file=$dev --weights $weights $devini";
+  my $cmd = "$DISTVEST $usefork --decode-nodes $JOBS --ref-files=$drefs --source-file=$dev --weights $weights $devini";
   print STDERR "MERT COMMAND: $cmd\n";
   `rm -rf $outdir/vest 2> /dev/null`;
   chdir $outdir or die "Can't chdir to $outdir: $!";
@@ -216,7 +224,7 @@ if (-f $tuned_weights) {
 print STDERR "\nDECODE TEST SET\n";
 my $decolog = mydircat($outdir, "test-decode.log");
 my $testtrans = mydircat($outdir, "test.trans");
-my $cmd = "cat $test | $PARALLELIZE -j 20 -e $decolog -- $CDEC -c $testini -w $tuned_weights > $testtrans";
+my $cmd = "cat $test | $PARALLELIZE $usefork -j $JOBS -e $decolog -- $CDEC -c $testini -w $tuned_weights > $testtrans";
 safesystem($testtrans, $cmd) or die "Failed to decode test set!";
 
 
@@ -292,8 +300,8 @@ sub write_cdec_ini {
 formalism=scfg
 cubepruning_pop_limit=100
 add_pass_through_rules=true
-scfg_extra_glue_grammar=/export/ws10smt/data/glue/glue.scfg.gz
-grammar=/export/ws10smt/data/oov.scfg.gz
+scfg_extra_glue_grammar=$datadir/glue/glue.scfg.gz
+grammar=$datadir/oov.scfg.gz
 grammar=$grammar_path
 scfg_default_nt=OOV
 scfg_no_hiero_glue_grammar=true
diff --git a/gi/pipeline/valhalla.config b/gi/pipeline/valhalla.config
index 503cbd4a..e00a8485 100644
--- a/gi/pipeline/valhalla.config
+++ b/gi/pipeline/valhalla.config
@@ -1,5 +1,6 @@
 # THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED
 # name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ...
+/home/chris/ws10smt/data
 btec /home/chris/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh
 fbis /home/chris/ws10smt/data/chinese-english.fbis corpus.zh-en.al
 zhen /home/chris/ws10smt/data/chinese-english corpus.zh-en.al
author	redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-17 18:17:34 +0000
committer	redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-17 18:17:34 +0000
commit	d24aa658436cbae3404146c106b0b7569eac60ed (patch)
tree	047c40e0b8958726570dcccb64fbf562f94b04a4 /gi/pipeline
parent	a2d3ef53c27bfaf4a0e9b638d8bfe24d1114fe9c (diff)