summaryrefslogtreecommitdiff
path: root/gi/pyp-topics/src/train-contexts.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gi/pyp-topics/src/train-contexts.cc')
-rw-r--r--gi/pyp-topics/src/train-contexts.cc4
1 files changed, 3 insertions, 1 deletions
diff --git a/gi/pyp-topics/src/train-contexts.cc b/gi/pyp-topics/src/train-contexts.cc
index 0a48d3d9..5e98d02f 100644
--- a/gi/pyp-topics/src/train-contexts.cc
+++ b/gi/pyp-topics/src/train-contexts.cc
@@ -54,6 +54,7 @@ int main(int argc, char **argv)
("freq-cutoff-end", value<int>()->default_value(0), "final frequency cutoff.")
("freq-cutoff-interval", value<int>()->default_value(0), "number of iterations between frequency decrement.")
("max-threads", value<int>()->default_value(1), "maximum number of simultaneous threads allowed")
+ ("max-contexts-per-document", value<int>()->default_value(0), "Only sample the n most frequent contexts for a document.")
("num-jobs", value<int>()->default_value(1), "allows finer control over parallelization")
;
@@ -110,7 +111,8 @@ int main(int argc, char **argv)
model.sample_corpus(contexts_corpus, vm["samples"].as<int>(),
vm["freq-cutoff-start"].as<int>(),
vm["freq-cutoff-end"].as<int>(),
- vm["freq-cutoff-interval"].as<int>());
+ vm["freq-cutoff-interval"].as<int>(),
+ vm["max-contexts-per-document"].as<int>());
if (vm.count("document-topics-out")) {
ogzstream documents_out(vm["document-topics-out"].as<string>().c_str());