From 7d124c74f6d88c32d72a5f3de2d405eb85b050ec Mon Sep 17 00:00:00 2001 From: "philblunsom@gmail.com" Date: Fri, 2 Jul 2010 17:14:55 +0000 Subject: git-svn-id: https://ws10smt.googlecode.com/svn/trunk@115 ec762483-ff6d-05da-a07a-a48fb63a330f --- gi/pyp-topics/src/pyp-topics.cc | 28 +++++++++++++++++++++++++++- gi/pyp-topics/src/pyp-topics.hh | 3 ++- gi/pyp-topics/src/train-contexts.cc | 3 ++- 3 files changed, 31 insertions(+), 3 deletions(-) (limited to 'gi') diff --git a/gi/pyp-topics/src/pyp-topics.cc b/gi/pyp-topics/src/pyp-topics.cc index 56d49928..186267d3 100644 --- a/gi/pyp-topics/src/pyp-topics.cc +++ b/gi/pyp-topics/src/pyp-topics.cc @@ -180,6 +180,7 @@ void PYPTopics::sample(const Corpus& corpus, int samples) { << pypIt->num_types() << "," << m_topic_pyp.prob(k, m_topic_p0) << "> "; if (k % 5 == 0) std::cerr << std::endl << '\t'; } + std::cerr.precision(4); std::cerr << std::endl; } } @@ -283,7 +284,32 @@ int PYPTopics::max_topic() const { return current_topic; } -int PYPTopics::max(const DocumentId& doc, const Term& term) { +int PYPTopics::max(const DocumentId& doc) const { + //std::cerr << "PYPTopics::max(" << doc << "," << term << ")" << std::endl; + // collect probs + F current_max=0.0; + int current_topic=-1; + for (int k=0; k current_max) { + current_max = prob; + current_topic = k; + } + } + assert(current_topic >= 0); + return current_topic; +} + +int PYPTopics::max(const DocumentId& doc, const Term& term) const { //std::cerr << "PYPTopics::max(" << doc << "," << term << ")" << std::endl; // collect probs F current_max=0.0; diff --git a/gi/pyp-topics/src/pyp-topics.hh b/gi/pyp-topics/src/pyp-topics.hh index 7e003228..c35645aa 100644 --- a/gi/pyp-topics/src/pyp-topics.hh +++ b/gi/pyp-topics/src/pyp-topics.hh @@ -21,7 +21,8 @@ public: void sample(const Corpus& corpus, int samples); int sample(const DocumentId& doc, const Term& term); - int max(const DocumentId& doc, const Term& term); + int max(const DocumentId& doc, const Term& term) const; + int max(const DocumentId& doc) const; int max_topic() const; void set_backoff(const std::string& filename) { diff --git a/gi/pyp-topics/src/train-contexts.cc b/gi/pyp-topics/src/train-contexts.cc index 12e7baec..0a2f1959 100644 --- a/gi/pyp-topics/src/train-contexts.cc +++ b/gi/pyp-topics/src/train-contexts.cc @@ -99,7 +99,7 @@ int main(int argc, char **argv) map all_terms; for (Corpus::const_iterator corpusIt=contexts_corpus.begin(); corpusIt != contexts_corpus.end(); ++corpusIt, ++document_id) { - vector unique_terms; + vector unique_terms; for (Document::const_iterator docIt=corpusIt->begin(); docIt != corpusIt->end(); ++docIt) { if (unique_terms.empty() || *docIt != unique_terms.back()) @@ -111,6 +111,7 @@ int main(int argc, char **argv) //insert_result.first++; } documents_out << contexts_corpus.key(document_id) << '\t'; + documents_out << model.max(document_id) << " ||| "; for (std::vector::const_iterator termIt=unique_terms.begin(); termIt != unique_terms.end(); ++termIt) { if (termIt != unique_terms.begin()) -- cgit v1.2.3