diff options
| author | philblunsom <philblunsom@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-02 20:07:37 +0000 | 
|---|---|---|
| committer | philblunsom <philblunsom@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-02 20:07:37 +0000 | 
| commit | 755b9e189ed9d07f42816937466ec89e5b977c6e (patch) | |
| tree | 2afa9d2b83e9443d83a65372331e8e61abfb2f87 /gi/pipeline | |
| parent | 36b0eac74f5d8f8674659826a72276b47d687bd6 (diff) | |
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@119 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pipeline')
| -rwxr-xr-x | gi/pipeline/local-gi-pipeline.pl | 12 | 
1 files changed, 10 insertions, 2 deletions
| diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl index acd6b94c..17f39a62 100755 --- a/gi/pipeline/local-gi-pipeline.pl +++ b/gi/pipeline/local-gi-pipeline.pl @@ -14,6 +14,9 @@ my $NUM_SAMPLES = 100;  my $CONTEXT_SIZE = 1;  my $BIDIR = 1; +my $HIERARCHICAL_TOPICS = 0; +my $FILTER_SINGLETONS = 0; +  my $EXTOOLS = "$SCRIPT_DIR/../../extools";  die "Can't find extools: $EXTOOLS" unless -e $EXTOOLS && -d $EXTOOLS;  my $PYPTOOLS = "$SCRIPT_DIR/../pyp-topics/src"; @@ -39,6 +42,8 @@ usage() unless &GetOptions('base_phrase_max_size=i' => \$BASE_PHRASE_MAX_SIZE,                             'topics=i' => \$NUM_TOPICS,                             'trg_context=i' => \$CONTEXT_SIZE,                             'samples=i' => \$NUM_SAMPLES, +                           'hierarchical-topics' => \$HIERARCHICAL_TOPICS, +                           'filter-singletons' => \$FILTER_SINGLETONS,                            );  usage() unless scalar @ARGV == 1; @@ -130,8 +135,11 @@ sub topic_train {    if (-e $OUT_CLUSTERS) {      print STDERR "$OUT_CLUSTERS exists, reusing...\n";    } else { -    safesystem("$TOPIC_TRAIN --data $IN_CONTEXTS --backoff-type simple -t $NUM_TOPICS -s $NUM_SAMPLES -o $OUT_CLUSTERS -w /dev/null") or die "Topic training failed.\n"; -#   safesystem("$TOPIC_TRAIN -d $IN_DOCS -t $NUM_TOPICS -s $NUM_SAMPLES -o $OUT_CLUSTERS -w /dev/null") or die "Topic training failed.\n"; +    my $FILTER_SINGLETONS_ARG = ""; +    $FILTER_SINGLETONS_ARG = "--filter-singleton-contexts" if $FILTER_SINGLETONS; +    my $HIERARCHICAL_TOPICS_ARG = ""; +    $HIERARCHICAL_TOPICS_ARG = "--hierarchical-topics" if $HIERARCHICAL_TOPICS; +    safesystem("$TOPIC_TRAIN --data $IN_CONTEXTS --backoff-type simple -t $NUM_TOPICS -s $NUM_SAMPLES -o $OUT_CLUSTERS $HIERARCHICAL_TOPICS_ARG $FILTER_SINGLETONS_ARG -w /dev/null") or die "Topic training failed.\n";    }  } | 
