Changed timer to be mac compatible.

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@103 ec762483-ff6d-05da-a07a-a48fb63a330f
author: philblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-02 14:31:13 +0000
committer: philblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-02 14:31:13 +0000
commit: 595dd3f0e577f522d32318acb2ad0fe288e0b00f (patch)
tree: a57c63e14000e34a4bd804b74556c22ceb37378e /gi/pyp-topics/src/train-contexts.cc
parent: 23b3ac72093b642e9151deaa4864f5f084153e1b (diff)
1 files changed, 12 insertions, 0 deletions
diff --git a/gi/pyp-topics/src/train-contexts.cc b/gi/pyp-topics/src/train-contexts.cc
index 02bb7b76..c58474da 100644
--- a/gi/pyp-topics/src/train-contexts.cc
+++ b/gi/pyp-topics/src/train-contexts.cc
@@ -40,6 +40,7 @@ int main(int argc, char **argv)
       ("data,d", value<string>(), "file containing the documents and context terms")
       ("topics,t", value<int>()->default_value(50), "number of topics")
       ("document-topics-out,o", value<string>(), "file to write the document topics to")
+      ("default-topics-out", value<string>(), "file to write default term topic assignments.")
       ("topic-words-out,w", value<string>(), "file to write the topic word distribution to")
       ("samples,s", value<int>()->default_value(10), "number of sampling passes through the data")
       ("backoff-type", value<string>(), "backoff type: none|simple")
@@ -95,6 +96,7 @@ int main(int argc, char **argv)
     ogzstream documents_out(vm["document-topics-out"].as<string>().c_str());
 
     int document_id=0;
+    std::set<int> all_terms;
     for (Corpus::const_iterator corpusIt=contexts_corpus.begin(); 
          corpusIt != contexts_corpus.end(); ++corpusIt, ++document_id) {
       std::vector<int> unique_terms;
@@ -111,10 +113,20 @@ int main(int argc, char **argv)
         std::vector<std::string> strings = contexts_corpus.context2string(*termIt);
         std::copy(strings.begin(), strings.end(), std::ostream_iterator<std::string>(documents_out, " "));
         documents_out << "||| C=" << model.max(document_id, *termIt);
+
+        all_terms.insert(*termIt);
       }
       documents_out << std::endl;
     }
     documents_out.close();
+
+    std::ofstream default_topics(vm["default-topics-out"].as<string>().c_str());
+    default_topics << model.max_topic() << std::endl;
+    for (std::set<int>::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) {
+      std::vector<std::string> strings = contexts_corpus.context2string(*termIt);
+      std::copy(strings.begin(), strings.end(), std::ostream_iterator<std::string>(documents_out, " "));
+      default_topics << model.max(-1, *termIt) << std::endl;
+    }
   }
 
   if (vm.count("topic-words-out")) {
author	philblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-02 14:31:13 +0000
committer	philblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-02 14:31:13 +0000
commit	595dd3f0e577f522d32318acb2ad0fe288e0b00f (patch)
tree	a57c63e14000e34a4bd804b74556c22ceb37378e /gi/pyp-topics/src/train-contexts.cc
parent	23b3ac72093b642e9151deaa4864f5f084153e1b (diff)