From d403f643aaeb1d42658a4e1e585c87f621423a46 Mon Sep 17 00:00:00 2001 From: philblunsom Date: Tue, 29 Jun 2010 05:02:51 +0000 Subject: git-svn-id: https://ws10smt.googlecode.com/svn/trunk@51 ec762483-ff6d-05da-a07a-a48fb63a330f --- gi/pipeline/local-gi-pipeline.pl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl index 6199e4c9..af83beb8 100755 --- a/gi/pipeline/local-gi-pipeline.pl +++ b/gi/pipeline/local-gi-pipeline.pl @@ -52,7 +52,7 @@ my $CORPUS = $ARGV[0]; open F, "<$CORPUS" or die "Can't read $CORPUS: $!"; close F; extract_context(); -contexts_to_documents(); +# contexts_to_documents(); topic_train(); label_spans_with_topics(); my $res; @@ -118,12 +118,14 @@ sub contexts_to_documents { sub topic_train { print STDERR "\n!!!TRAIN PYP TOPICS\n"; - my $IN_DOCS = "$OUTPUT/ctx.num.gz"; +# my $IN_DOCS = "$OUTPUT/ctx.num.gz"; + my $IN_CONTEXTS = "$OUTPUT/context.txt.gz"; my $OUT_CLUSTERS = "$OUTPUT/docs.txt.gz"; if (-e $OUT_CLUSTERS) { print STDERR "$OUT_CLUSTERS exists, reusing...\n"; } else { - safesystem("$TOPIC_TRAIN -d $IN_DOCS -t $NUM_TOPICS -s $NUM_SAMPLES -o $OUT_CLUSTERS -w /dev/null") or die "Topic training failed.\n"; + safesystem("$TOPIC_TRAIN --contexts $IN_CONTEXTS --backoff-type simple -t $NUM_TOPICS -s $NUM_SAMPLES -o $OUT_CLUSTERS -w /dev/null") or die "Topic training failed.\n"; +# safesystem("$TOPIC_TRAIN -d $IN_DOCS -t $NUM_TOPICS -s $NUM_SAMPLES -o $OUT_CLUSTERS -w /dev/null") or die "Topic training failed.\n"; } } -- cgit v1.2.3