From 5c1cbf1d5e6dcdf14984697e85c803f1dd549515 Mon Sep 17 00:00:00 2001
From: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>
Date: Sat, 17 Jul 2010 18:17:34 +0000
Subject: more support for other clusters

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@307 ec762483-ff6d-05da-a07a-a48fb63a330f
---
 gi/pipeline/clsp.config            |  1 +
 gi/pipeline/evaluation-pipeline.pl | 16 ++++++++++++----
 gi/pipeline/valhalla.config        |  1 +
 3 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'gi')
diff --git a/gi/pipeline/clsp.config b/gi/pipeline/clsp.config
index 27161fab..f7f131a0 100644
--- a/gi/pipeline/clsp.config
+++ b/gi/pipeline/clsp.config
@@ -1,5 +1,6 @@
 # THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED
 # name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ...
+/export/ws10smt/data
 btec /export/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh
 fbis /export/ws10smt/data/chinese-english.fbis corpus.zh-en.al
 zhen /export/ws10smt/data/chinese-english corpus.zh-en.al
diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl
index c6dcca05..178159b9 100755
--- a/gi/pipeline/evaluation-pipeline.pl
+++ b/gi/pipeline/evaluation-pipeline.pl
@@ -6,6 +6,8 @@ my $CWD = getcwd;
 
 my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; }
 
+my $JOBS = 15;
+
 # featurize_grammar may add multiple features from a single feature extractor
 # the key in this map is the extractor name, the value is a list of the extracted features
 my $feat_map = {
@@ -89,6 +91,7 @@ my %devs;
 my %devrefs;
 my %tests;
 my %testevals;
+my $datadir;
 print STDERR "       LANGUAGE PAIRS:";
 while(<CONF>) {
   chomp;
@@ -96,6 +99,7 @@ while(<CONF>) {
   next if /^\s*$/;
   s/^\s+//;
   s/\s+$//;
+  if (! defined $datadir) { $datadir = $_; next; }
   my ($name, $path, $corpus, $lm, $dev, $devref, @xtests) = split /\s+/;
   $paths{$name} = $path;
   $corpora{$name} = $corpus;
@@ -116,15 +120,19 @@ my $FEATURIZER_OPTS = '';
 my $dataDir = '/export/ws10smt/data';
 my @features;
 my $bkoffgram;
+my $usefork;
 if (GetOptions(
         "backoff_grammar" => \$bkoffgram,
         "data=s" => \$dataDir,
         "features=s@" => \@features,
+        "use-fork" => \$usefork,
+        "jobs=i" => \$JOBS,
         "out-dir=s" => \$outdir,
 ) == 0 || @ARGV!=2 || $help) {
         print_help();
         exit;
 }
+if ($usefork) { $usefork="--use-fork"; } else { $usefork = ''; }
 my @fkeys = keys %$feat_map;
 push(@features, "BackoffRule") if $bkoffgram;
 die "You must specify one or more features with -f. Known features: @fkeys\n" unless scalar @features > 0;
@@ -200,7 +208,7 @@ my $tuned_weights = mydircat($outdir, 'weights.tuned');
 if (-f $tuned_weights) {
   print STDERR "TUNED WEIGHTS $tuned_weights EXISTS: REUSING\n";
 } else {
-  my $cmd = "$DISTVEST --ref-files=$drefs --source-file=$dev --weights $weights $devini";
+  my $cmd = "$DISTVEST $usefork --decode-nodes $JOBS --ref-files=$drefs --source-file=$dev --weights $weights $devini";
   print STDERR "MERT COMMAND: $cmd\n";
   `rm -rf $outdir/vest 2> /dev/null`;
   chdir $outdir or die "Can't chdir to $outdir: $!";
@@ -216,7 +224,7 @@ if (-f $tuned_weights) {
 print STDERR "\nDECODE TEST SET\n";
 my $decolog = mydircat($outdir, "test-decode.log");
 my $testtrans = mydircat($outdir, "test.trans");
-my $cmd = "cat $test | $PARALLELIZE -j 20 -e $decolog -- $CDEC -c $testini -w $tuned_weights > $testtrans";
+my $cmd = "cat $test | $PARALLELIZE $usefork -j $JOBS -e $decolog -- $CDEC -c $testini -w $tuned_weights > $testtrans";
 safesystem($testtrans, $cmd) or die "Failed to decode test set!";
 
 
@@ -292,8 +300,8 @@ sub write_cdec_ini {
 formalism=scfg
 cubepruning_pop_limit=100
 add_pass_through_rules=true
-scfg_extra_glue_grammar=/export/ws10smt/data/glue/glue.scfg.gz
-grammar=/export/ws10smt/data/oov.scfg.gz
+scfg_extra_glue_grammar=$datadir/glue/glue.scfg.gz
+grammar=$datadir/oov.scfg.gz
 grammar=$grammar_path
 scfg_default_nt=OOV
 scfg_no_hiero_glue_grammar=true
diff --git a/gi/pipeline/valhalla.config b/gi/pipeline/valhalla.config
index 503cbd4a..e00a8485 100644
--- a/gi/pipeline/valhalla.config
+++ b/gi/pipeline/valhalla.config
@@ -1,5 +1,6 @@
 # THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED
 # name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ...
+/home/chris/ws10smt/data
 btec /home/chris/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh
 fbis /home/chris/ws10smt/data/chinese-english.fbis corpus.zh-en.al
 zhen /home/chris/ws10smt/data/chinese-english corpus.zh-en.al
-- 
cgit v1.2.3