summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-13 20:09:22 +0000
committertrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-13 20:09:22 +0000
commit19dc80f89a047b09010187f2306c85a16b25f575 (patch)
tree7c9620ebab5ed1fb473ce7cc4d2de941cbfc2ce9
parent4eb281af2a9a8538c934860456660ff792b44664 (diff)
Updated PR command line and output directory. Override for experiment directory.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@240 ec762483-ff6d-05da-a07a-a48fb63a330f
-rwxr-xr-xgi/pipeline/evaluation-pipeline.pl1
-rwxr-xr-xgi/pipeline/local-gi-pipeline.pl15
l---------gi/posterior-regularisation/prjava.jar2
3 files changed, 12 insertions, 6 deletions
diff --git a/gi/pipeline/evaluation-pipeline.pl b/gi/pipeline/evaluation-pipeline.pl
index f5ad7b14..6e786a8a 100755
--- a/gi/pipeline/evaluation-pipeline.pl
+++ b/gi/pipeline/evaluation-pipeline.pl
@@ -110,6 +110,7 @@ if (GetOptions(
"backoff_grammar" => \$bkoffgram,
"data=s" => \$dataDir,
"features=s@" => \@features,
+ "out-dir=s" => \$outdir,
) == 0 || @ARGV!=2 || $help) {
print_help();
exit;
diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl
index 259dcd9c..bcf9c9be 100755
--- a/gi/pipeline/local-gi-pipeline.pl
+++ b/gi/pipeline/local-gi-pipeline.pl
@@ -20,9 +20,10 @@ my $TOPICS_CONFIG = "pyp-topics.conf";
my $MODEL = "pyp";
my $NUM_EM_ITERS = 100;
my $NUM_PR_ITERS = 0;
-my $PR_SCALE_P = 10;
+my $PR_SCALE_P = 1;
my $PR_SCALE_C = 0;
my $PR_THREADS = 0;
+my $AGREE;
my $EXTOOLS = "$SCRIPT_DIR/../../extools";
die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS;
@@ -63,6 +64,7 @@ usage() unless &GetOptions('base_phrase_max_size=i' => \$BASE_PHRASE_MAX_SIZE,
'pr-scale-phrase=f' => \$PR_SCALE_P,
'pr-scale-context=f' => \$PR_SCALE_C,
'pr-threads=i' => \$PR_THREADS,
+ 'pr-agree' => \$AGREE,
'tagged_corpus=s' => \$TAGGED_CORPUS,
);
@@ -132,10 +134,12 @@ sub cluster_dir {
if (lc($MODEL) eq "pyp") {
return context_dir() . ".PYP.t$NUM_TOPICS.s$NUM_SAMPLES";
} elsif (lc($MODEL) eq "prem") {
- if ($NUM_PR_ITERS == 0) {
- return context_dir() . ".PREM.t$NUM_TOPICS.ie$NUM_EM_ITERS.ip$NUM_PR_ITERS";
+ if (defined($AGREE)) {
+ return context_dir() . ".AGREE.t$NUM_TOPICS.i$NUM_EM_ITERS";
+ } elsif ($NUM_PR_ITERS == 0) {
+ return context_dir() . ".EM.t$NUM_TOPICS.i$NUM_EM_ITERS";
} else {
- return context_dir() . ".PREM.t$NUM_TOPICS.ie$NUM_EM_ITERS.ip$NUM_PR_ITERS.sp$PR_SCALE_P.sc$PR_SCALE_C";
+ return context_dir() . ".PR.t$NUM_TOPICS.ie$NUM_EM_ITERS.ip$NUM_PR_ITERS.sp$PR_SCALE_P.sc$PR_SCALE_C";
}
}
}
@@ -205,7 +209,8 @@ sub prem_train {
if (-e $OUT_CLUSTERS) {
print STDERR "$OUT_CLUSTERS exists, reusing...\n";
} else {
- safesystem("$PREM_TRAIN --in $IN_CONTEXTS --topics $NUM_TOPICS --out $OUT_CLUSTERS --em $NUM_EM_ITERS --pr $NUM_PR_ITERS --scale-phrase $PR_SCALE_P --scale-context $PR_SCALE_C --threads $PR_THREADS") or die "Topic training failed.\n";
+ my $agree = ($AGREE) ? "--agree" : "";
+ safesystem("$PREM_TRAIN --in $IN_CONTEXTS --topics $NUM_TOPICS --out $OUT_CLUSTERS --em $NUM_EM_ITERS --pr $NUM_PR_ITERS --scale-phrase $PR_SCALE_P --scale-context $PR_SCALE_C --threads $PR_THREADS $agree") or die "Topic training failed.\n";
}
}
diff --git a/gi/posterior-regularisation/prjava.jar b/gi/posterior-regularisation/prjava.jar
index 7cd1a3ff..da8bf761 120000
--- a/gi/posterior-regularisation/prjava.jar
+++ b/gi/posterior-regularisation/prjava.jar
@@ -1 +1 @@
-prjava/prjava-20100707.jar \ No newline at end of file
+prjava/prjava-20100708.jar \ No newline at end of file