diff options
author | trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-13 20:09:22 +0000 |
---|---|---|
committer | trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-13 20:09:22 +0000 |
commit | c807e0b514f21a80df0268c686c7ba70fe39611a (patch) | |
tree | 65f6f728941fe1f9b885a9ef4caae0b75ee8354b /gi/pipeline/local-gi-pipeline.pl | |
parent | c3fc0f68dd90fa1c88485c63624c9987b6e1b297 (diff) |
Updated PR command line and output directory. Override for experiment directory.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@240 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pipeline/local-gi-pipeline.pl')
-rwxr-xr-x | gi/pipeline/local-gi-pipeline.pl | 15 |
1 files changed, 10 insertions, 5 deletions
diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl index 259dcd9c..bcf9c9be 100755 --- a/gi/pipeline/local-gi-pipeline.pl +++ b/gi/pipeline/local-gi-pipeline.pl @@ -20,9 +20,10 @@ my $TOPICS_CONFIG = "pyp-topics.conf"; my $MODEL = "pyp"; my $NUM_EM_ITERS = 100; my $NUM_PR_ITERS = 0; -my $PR_SCALE_P = 10; +my $PR_SCALE_P = 1; my $PR_SCALE_C = 0; my $PR_THREADS = 0; +my $AGREE; my $EXTOOLS = "$SCRIPT_DIR/../../extools"; die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS; @@ -63,6 +64,7 @@ usage() unless &GetOptions('base_phrase_max_size=i' => \$BASE_PHRASE_MAX_SIZE, 'pr-scale-phrase=f' => \$PR_SCALE_P, 'pr-scale-context=f' => \$PR_SCALE_C, 'pr-threads=i' => \$PR_THREADS, + 'pr-agree' => \$AGREE, 'tagged_corpus=s' => \$TAGGED_CORPUS, ); @@ -132,10 +134,12 @@ sub cluster_dir { if (lc($MODEL) eq "pyp") { return context_dir() . ".PYP.t$NUM_TOPICS.s$NUM_SAMPLES"; } elsif (lc($MODEL) eq "prem") { - if ($NUM_PR_ITERS == 0) { - return context_dir() . ".PREM.t$NUM_TOPICS.ie$NUM_EM_ITERS.ip$NUM_PR_ITERS"; + if (defined($AGREE)) { + return context_dir() . ".AGREE.t$NUM_TOPICS.i$NUM_EM_ITERS"; + } elsif ($NUM_PR_ITERS == 0) { + return context_dir() . ".EM.t$NUM_TOPICS.i$NUM_EM_ITERS"; } else { - return context_dir() . ".PREM.t$NUM_TOPICS.ie$NUM_EM_ITERS.ip$NUM_PR_ITERS.sp$PR_SCALE_P.sc$PR_SCALE_C"; + return context_dir() . ".PR.t$NUM_TOPICS.ie$NUM_EM_ITERS.ip$NUM_PR_ITERS.sp$PR_SCALE_P.sc$PR_SCALE_C"; } } } @@ -205,7 +209,8 @@ sub prem_train { if (-e $OUT_CLUSTERS) { print STDERR "$OUT_CLUSTERS exists, reusing...\n"; } else { - safesystem("$PREM_TRAIN --in $IN_CONTEXTS --topics $NUM_TOPICS --out $OUT_CLUSTERS --em $NUM_EM_ITERS --pr $NUM_PR_ITERS --scale-phrase $PR_SCALE_P --scale-context $PR_SCALE_C --threads $PR_THREADS") or die "Topic training failed.\n"; + my $agree = ($AGREE) ? "--agree" : ""; + safesystem("$PREM_TRAIN --in $IN_CONTEXTS --topics $NUM_TOPICS --out $OUT_CLUSTERS --em $NUM_EM_ITERS --pr $NUM_PR_ITERS --scale-phrase $PR_SCALE_P --scale-context $PR_SCALE_C --threads $PR_THREADS $agree") or die "Topic training failed.\n"; } } |