Updated pipeline defaults.

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@121 ec762483-ff6d-05da-a07a-a48fb63a330f
author: philblunsom <philblunsom@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-02 20:36:27 +0000
committer: philblunsom <philblunsom@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-02 20:36:27 +0000
commit: 1e55e4610d29af87860e38e461840f9e628bac2e (patch)
tree: f819b92081c541153bec91f97015277f6501a26d /gi
parent: 445d640d67a10b79c1dd97ce17f5786bfd599176 (diff)
2 files changed, 11 insertions, 9 deletions
diff --git a/gi/pipeline/local-gi-pipeline.pl b/gi/pipeline/local-gi-pipeline.pl
index 17f39a62..7c31be3b 100755
--- a/gi/pipeline/local-gi-pipeline.pl
+++ b/gi/pipeline/local-gi-pipeline.pl
@@ -10,7 +10,7 @@ my $BASE_PHRASE_MAX_SIZE = 10;
 my $COMPLETE_CACHE = 1;
 my $ITEMS_IN_MEMORY = 10000000;  # cache size in extractors
 my $NUM_TOPICS = 50;
-my $NUM_SAMPLES = 100;
+my $NUM_SAMPLES = 1000;
 my $CONTEXT_SIZE = 1;
 my $BIDIR = 1;
 
diff --git a/gi/pyp-topics/src/train-contexts.cc b/gi/pyp-topics/src/train-contexts.cc
index de107c86..7e2100f8 100644
--- a/gi/pyp-topics/src/train-contexts.cc
+++ b/gi/pyp-topics/src/train-contexts.cc
@@ -96,7 +96,7 @@ int main(int argc, char **argv)
     ogzstream documents_out(vm["document-topics-out"].as<string>().c_str());
 
     int document_id=0;
-   map<int,int> all_terms;
+    map<int,int> all_terms;
     for (Corpus::const_iterator corpusIt=contexts_corpus.begin(); 
          corpusIt != contexts_corpus.end(); ++corpusIt, ++document_id) {
       vector<int> unique_terms;
@@ -125,13 +125,15 @@ int main(int argc, char **argv)
     }
     documents_out.close();
 
-   ofstream default_topics(vm["default-topics-out"].as<string>().c_str());
-    default_topics << model.max_topic() <<endl;
-    for (std::map<int,int>::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) {
-     vector<std::string> strings = contexts_corpus.context2string(termIt->first);
-      default_topics << model.max(-1, termIt->first) << " ||| " << termIt->second << " ||| ";
-     copy(strings.begin(), strings.end(),ostream_iterator<std::string>(default_topics, " "));
-      default_topics <<endl;
+    if (vm.count("default-topics-out")) {
+      ofstream default_topics(vm["default-topics-out"].as<string>().c_str());
+      default_topics << model.max_topic() <<endl;
+      for (std::map<int,int>::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) {
+       vector<std::string> strings = contexts_corpus.context2string(termIt->first);
+        default_topics << model.max(-1, termIt->first) << " ||| " << termIt->second << " ||| ";
+       copy(strings.begin(), strings.end(),ostream_iterator<std::string>(default_topics, " "));
+        default_topics <<endl;
+      }
     }
   }
author	philblunsom <philblunsom@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-02 20:36:27 +0000
committer	philblunsom <philblunsom@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-02 20:36:27 +0000
commit	1e55e4610d29af87860e38e461840f9e628bac2e (patch)
tree	f819b92081c541153bec91f97015277f6501a26d /gi
parent	445d640d67a10b79c1dd97ce17f5786bfd599176 (diff)