From 4af669e58b6da1db14f2a7c56677ab8b0f3cb6b5 Mon Sep 17 00:00:00 2001 From: "philblunsom@gmail.com" Date: Fri, 2 Jul 2010 15:11:30 +0000 Subject: git-svn-id: https://ws10smt.googlecode.com/svn/trunk@109 ec762483-ff6d-05da-a07a-a48fb63a330f --- gi/pyp-topics/src/train-contexts.cc | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'gi/pyp-topics/src') diff --git a/gi/pyp-topics/src/train-contexts.cc b/gi/pyp-topics/src/train-contexts.cc index d7262cdc..110fb9d5 100644 --- a/gi/pyp-topics/src/train-contexts.cc +++ b/gi/pyp-topics/src/train-contexts.cc @@ -25,8 +25,8 @@ using namespace std; int main(int argc, char **argv) { - std::cout << "Pitman Yor topic models: Copyright 2010 Phil Blunsom\n"; - std::cout << REVISION << '\n' << std::endl; + cout << "Pitman Yor topic models: Copyright 2010 Phil Blunsom\n"; + cout << REVISION << '\n' <().c_str()); int document_id=0; - std::set all_terms; + map all_terms; for (Corpus::const_iterator corpusIt=contexts_corpus.begin(); corpusIt != contexts_corpus.end(); ++corpusIt, ++document_id) { - std::vector unique_terms; + vector unique_terms; for (Document::const_iterator docIt=corpusIt->begin(); docIt != corpusIt->end(); ++docIt) { if (unique_terms.empty() || *docIt != unique_terms.back()) @@ -110,23 +110,25 @@ int main(int argc, char **argv) termIt != unique_terms.end(); ++termIt) { if (termIt != unique_terms.begin()) documents_out << " ||| "; - std::vector strings = contexts_corpus.context2string(*termIt); - std::copy(strings.begin(), strings.end(), std::ostream_iterator(documents_out, " ")); + vector strings = contexts_corpus.context2string(*termIt); + copy(strings.begin(), strings.end(),ostream_iterator(documents_out, " ")); documents_out << "||| C=" << model.max(document_id, *termIt); - all_terms.insert(*termIt); + // increment this terms frequency + pair::iterator,bool> insert_result = all_terms.insert(make_pair(*termIt,1)); + if (!insert_result.second) insert_result.first++; } - documents_out << std::endl; + documents_out <().c_str()); - default_topics << model.max_topic() << std::endl; - for (std::set::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) { - std::vector strings = contexts_corpus.context2string(*termIt); - default_topics << model.max(-1, *termIt) << " ||| "; - std::copy(strings.begin(), strings.end(), std::ostream_iterator(default_topics, " ")); - default_topics << std::endl; + ofstream default_topics(vm["default-topics-out"].as().c_str()); + default_topics << model.max_topic() <::const_iterator termIt=all_terms.begin(); termIt != all_terms.end(); ++termIt) { + vector strings = contexts_corpus.context2string(termIt->first); + default_topics << model.max(-1, termIt->first) << " ||| " << termIt->second << " ||| "; + copy(strings.begin(), strings.end(),ostream_iterator(default_topics, " ")); + default_topics <