From c0d43ad4fd094b5eeac37fc3e79d806aa928dc71 Mon Sep 17 00:00:00 2001
From: "philblunsom@gmail.com"
 <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>
Date: Fri, 2 Jul 2010 17:14:55 +0000
Subject: git-svn-id: https://ws10smt.googlecode.com/svn/trunk@115
 ec762483-ff6d-05da-a07a-a48fb63a330f

---
 gi/pyp-topics/src/pyp-topics.cc     | 28 +++++++++++++++++++++++++++-
 gi/pyp-topics/src/pyp-topics.hh     |  3 ++-
 gi/pyp-topics/src/train-contexts.cc |  3 ++-
 3 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'gi/pyp-topics/src')

diff --git a/gi/pyp-topics/src/pyp-topics.cc b/gi/pyp-topics/src/pyp-topics.cc
index 56d49928..186267d3 100644
--- a/gi/pyp-topics/src/pyp-topics.cc
+++ b/gi/pyp-topics/src/pyp-topics.cc
@@ -180,6 +180,7 @@ void PYPTopics::sample(const Corpus& corpus, int samples) {
           << pypIt->num_types() << "," << m_topic_pyp.prob(k, m_topic_p0) << "> ";
         if (k % 5 == 0) std::cerr << std::endl << '\t';
       }
+      std::cerr.precision(4);
       std::cerr << std::endl;
     }
   }
@@ -283,7 +284,32 @@ int PYPTopics::max_topic() const {
   return current_topic;
 }
 
-int PYPTopics::max(const DocumentId& doc, const Term& term) {
+int PYPTopics::max(const DocumentId& doc) const {
+  //std::cerr << "PYPTopics::max(" << doc << "," << term << ")" << std::endl;
+  // collect probs
+  F current_max=0.0;
+  int current_topic=-1;
+  for (int k=0; k<m_num_topics; ++k) {
+    //F p_w_k = prob(term, k);
+
+    F topic_prob = m_topic_p0;
+    if (m_use_topic_pyp) 
+      topic_prob = m_topic_pyp.prob(k, m_topic_p0);
+
+    F prob = 0;
+    if (doc < 0) prob = topic_prob;
+    else         prob = m_document_pyps[doc].prob(k, topic_prob);
+
+    if (prob > current_max) {
+      current_max = prob;
+      current_topic = k;
+    }
+  }
+  assert(current_topic >= 0);
+  return current_topic;
+}
+
+int PYPTopics::max(const DocumentId& doc, const Term& term) const {
   //std::cerr << "PYPTopics::max(" << doc << "," << term << ")" << std::endl;
   // collect probs
   F current_max=0.0;
diff --git a/gi/pyp-topics/src/pyp-topics.hh b/gi/pyp-topics/src/pyp-topics.hh
index 7e003228..c35645aa 100644
--- a/gi/pyp-topics/src/pyp-topics.hh
+++ b/gi/pyp-topics/src/pyp-topics.hh
@@ -21,7 +21,8 @@ public:
 
   void sample(const Corpus& corpus, int samples);
   int sample(const DocumentId& doc, const Term& term);
-  int max(const DocumentId& doc, const Term& term);
+  int max(const DocumentId& doc, const Term& term) const;
+  int max(const DocumentId& doc) const;
   int max_topic() const;
 
   void set_backoff(const std::string& filename) {
diff --git a/gi/pyp-topics/src/train-contexts.cc b/gi/pyp-topics/src/train-contexts.cc
index 12e7baec..0a2f1959 100644
--- a/gi/pyp-topics/src/train-contexts.cc
+++ b/gi/pyp-topics/src/train-contexts.cc
@@ -99,7 +99,7 @@ int main(int argc, char **argv)
    map<int,int> all_terms;
     for (Corpus::const_iterator corpusIt=contexts_corpus.begin(); 
          corpusIt != contexts_corpus.end(); ++corpusIt, ++document_id) {
-     vector<int> unique_terms;
+      vector<int> unique_terms;
       for (Document::const_iterator docIt=corpusIt->begin();
            docIt != corpusIt->end(); ++docIt) {
         if (unique_terms.empty() || *docIt != unique_terms.back())
@@ -111,6 +111,7 @@ int main(int argc, char **argv)
           //insert_result.first++;
       }
       documents_out << contexts_corpus.key(document_id) << '\t';
+      documents_out << model.max(document_id) << " ||| ";
       for (std::vector<int>::const_iterator termIt=unique_terms.begin();
            termIt != unique_terms.end(); ++termIt) {
         if (termIt != unique_terms.begin())
-- 
cgit v1.2.3