diff options
Diffstat (limited to 'gi')
| -rw-r--r-- | gi/pyp-topics/src/pyp-topics.cc | 28 | ||||
| -rw-r--r-- | gi/pyp-topics/src/pyp-topics.hh | 3 | ||||
| -rw-r--r-- | gi/pyp-topics/src/train-contexts.cc | 3 | 
3 files changed, 31 insertions, 3 deletions
| diff --git a/gi/pyp-topics/src/pyp-topics.cc b/gi/pyp-topics/src/pyp-topics.cc index 56d49928..186267d3 100644 --- a/gi/pyp-topics/src/pyp-topics.cc +++ b/gi/pyp-topics/src/pyp-topics.cc @@ -180,6 +180,7 @@ void PYPTopics::sample(const Corpus& corpus, int samples) {            << pypIt->num_types() << "," << m_topic_pyp.prob(k, m_topic_p0) << "> ";          if (k % 5 == 0) std::cerr << std::endl << '\t';        } +      std::cerr.precision(4);        std::cerr << std::endl;      }    } @@ -283,7 +284,32 @@ int PYPTopics::max_topic() const {    return current_topic;  } -int PYPTopics::max(const DocumentId& doc, const Term& term) { +int PYPTopics::max(const DocumentId& doc) const { +  //std::cerr << "PYPTopics::max(" << doc << "," << term << ")" << std::endl; +  // collect probs +  F current_max=0.0; +  int current_topic=-1; +  for (int k=0; k<m_num_topics; ++k) { +    //F p_w_k = prob(term, k); + +    F topic_prob = m_topic_p0; +    if (m_use_topic_pyp)  +      topic_prob = m_topic_pyp.prob(k, m_topic_p0); + +    F prob = 0; +    if (doc < 0) prob = topic_prob; +    else         prob = m_document_pyps[doc].prob(k, topic_prob); + +    if (prob > current_max) { +      current_max = prob; +      current_topic = k; +    } +  } +  assert(current_topic >= 0); +  return current_topic; +} + +int PYPTopics::max(const DocumentId& doc, const Term& term) const {    //std::cerr << "PYPTopics::max(" << doc << "," << term << ")" << std::endl;    // collect probs    F current_max=0.0; diff --git a/gi/pyp-topics/src/pyp-topics.hh b/gi/pyp-topics/src/pyp-topics.hh index 7e003228..c35645aa 100644 --- a/gi/pyp-topics/src/pyp-topics.hh +++ b/gi/pyp-topics/src/pyp-topics.hh @@ -21,7 +21,8 @@ public:    void sample(const Corpus& corpus, int samples);    int sample(const DocumentId& doc, const Term& term); -  int max(const DocumentId& doc, const Term& term); +  int max(const DocumentId& doc, const Term& term) const; +  int max(const DocumentId& doc) const;    int max_topic() const;    void set_backoff(const std::string& filename) { diff --git a/gi/pyp-topics/src/train-contexts.cc b/gi/pyp-topics/src/train-contexts.cc index 12e7baec..0a2f1959 100644 --- a/gi/pyp-topics/src/train-contexts.cc +++ b/gi/pyp-topics/src/train-contexts.cc @@ -99,7 +99,7 @@ int main(int argc, char **argv)     map<int,int> all_terms;      for (Corpus::const_iterator corpusIt=contexts_corpus.begin();            corpusIt != contexts_corpus.end(); ++corpusIt, ++document_id) { -     vector<int> unique_terms; +      vector<int> unique_terms;        for (Document::const_iterator docIt=corpusIt->begin();             docIt != corpusIt->end(); ++docIt) {          if (unique_terms.empty() || *docIt != unique_terms.back()) @@ -111,6 +111,7 @@ int main(int argc, char **argv)            //insert_result.first++;        }        documents_out << contexts_corpus.key(document_id) << '\t'; +      documents_out << model.max(document_id) << " ||| ";        for (std::vector<int>::const_iterator termIt=unique_terms.begin();             termIt != unique_terms.end(); ++termIt) {          if (termIt != unique_terms.begin()) | 
