diff options
author | philblunsom <philblunsom@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-02 20:36:27 +0000 |
---|---|---|
committer | philblunsom <philblunsom@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-02 20:36:27 +0000 |
commit | 1e55e4610d29af87860e38e461840f9e628bac2e (patch) | |
tree | f819b92081c541153bec91f97015277f6501a26d /gi | |
parent | 445d640d67a10b79c1dd97ce17f5786bfd599176 (diff) |
Updated pipeline defaults.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@121 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi')
-rwxr-xr-x | gi/pipeline/local-gi-pipeline.pl | 2 | ||||
-rw-r--r-- | gi/pyp-topics/src/train-contexts.cc | 18 |
2 files changed, 11 insertions, 9 deletions
diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl index 17f39a62..7c31be3b 100755 --- a/gi/pipeline/local-gi-pipeline.pl +++ b/gi/pipeline/local-gi-pipeline.pl @@ -10,7 +10,7 @@ my $BASE_PHRASE_MAX_SIZE = 10; my $COMPLETE_CACHE = 1; my $ITEMS_IN_MEMORY = 10000000; # cache size in extractors my $NUM_TOPICS = 50; -my $NUM_SAMPLES = 100; +my $NUM_SAMPLES = 1000; my $CONTEXT_SIZE = 1; my $BIDIR = 1; diff --git a/gi/pyp-topics/src/train-contexts.cc b/gi/pyp-topics/src/train-contexts.cc index de107c86..7e2100f8 100644 --- a/gi/pyp-topics/src/train-contexts.cc +++ b/gi/pyp-topics/src/train-contexts.cc @@ -96,7 +96,7 @@ int main(int argc, char **argv) ogzstream documents_out(vm["document-topics-out"].as<string>().c_str()); int document_id=0; - map<int,int> all_terms; + map<int,int> all_terms; for (Corpus::const_iterator corpusIt=contexts_corpus.begin(); corpusIt != contexts_corpus.end(); ++corpusIt, ++document_id) { vector<int> unique_terms; @@ -125,13 +125,15 @@ int main(int argc, char **argv) } documents_out.close(); - ofstream default_topics(vm["default-topics-out"].as<string>().c_str()); - default_topics << model.max_topic() <<endl; - for (std::map<int,int>::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) { - vector<std::string> strings = contexts_corpus.context2string(termIt->first); - default_topics << model.max(-1, termIt->first) << " ||| " << termIt->second << " ||| "; - copy(strings.begin(), strings.end(),ostream_iterator<std::string>(default_topics, " ")); - default_topics <<endl; + if (vm.count("default-topics-out")) { + ofstream default_topics(vm["default-topics-out"].as<string>().c_str()); + default_topics << model.max_topic() <<endl; + for (std::map<int,int>::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) { + vector<std::string> strings = contexts_corpus.context2string(termIt->first); + default_topics << model.max(-1, termIt->first) << " ||| " << termIt->second << " ||| "; + copy(strings.begin(), strings.end(),ostream_iterator<std::string>(default_topics, " ")); + default_topics <<endl; + } } } |