summaryrefslogtreecommitdiff
path: root/gi/pipeline
diff options
context:
space:
mode:
Diffstat (limited to 'gi/pipeline')
-rwxr-xr-xgi/pipeline/local-gi-pipeline.pl8
1 files changed, 5 insertions, 3 deletions
diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl
index 6199e4c9..af83beb8 100755
--- a/gi/pipeline/local-gi-pipeline.pl
+++ b/gi/pipeline/local-gi-pipeline.pl
@@ -52,7 +52,7 @@ my $CORPUS = $ARGV[0];
open F, "<$CORPUS" or die "Can't read $CORPUS: $!"; close F;
extract_context();
-contexts_to_documents();
+# contexts_to_documents();
topic_train();
label_spans_with_topics();
my $res;
@@ -118,12 +118,14 @@ sub contexts_to_documents {
sub topic_train {
print STDERR "\n!!!TRAIN PYP TOPICS\n";
- my $IN_DOCS = "$OUTPUT/ctx.num.gz";
+# my $IN_DOCS = "$OUTPUT/ctx.num.gz";
+ my $IN_CONTEXTS = "$OUTPUT/context.txt.gz";
my $OUT_CLUSTERS = "$OUTPUT/docs.txt.gz";
if (-e $OUT_CLUSTERS) {
print STDERR "$OUT_CLUSTERS exists, reusing...\n";
} else {
- safesystem("$TOPIC_TRAIN -d $IN_DOCS -t $NUM_TOPICS -s $NUM_SAMPLES -o $OUT_CLUSTERS -w /dev/null") or die "Topic training failed.\n";
+ safesystem("$TOPIC_TRAIN --contexts $IN_CONTEXTS --backoff-type simple -t $NUM_TOPICS -s $NUM_SAMPLES -o $OUT_CLUSTERS -w /dev/null") or die "Topic training failed.\n";
+# safesystem("$TOPIC_TRAIN -d $IN_DOCS -t $NUM_TOPICS -s $NUM_SAMPLES -o $OUT_CLUSTERS -w /dev/null") or die "Topic training failed.\n";
}
}