diff options
Diffstat (limited to 'gi/pyp-topics/src/pyp-topics.cc')
-rw-r--r-- | gi/pyp-topics/src/pyp-topics.cc | 38 |
1 files changed, 22 insertions, 16 deletions
diff --git a/gi/pyp-topics/src/pyp-topics.cc b/gi/pyp-topics/src/pyp-topics.cc index 186267d3..4fb75caa 100644 --- a/gi/pyp-topics/src/pyp-topics.cc +++ b/gi/pyp-topics/src/pyp-topics.cc @@ -1,21 +1,27 @@ +#ifdef __CYGWIN__ +# ifndef _POSIX_MONOTONIC_CLOCK +# define _POSIX_MONOTONIC_CLOCK +# endif +#endif + #include "pyp-topics.hh" //#include "mt19937ar.h" #include <boost/date_time/posix_time/posix_time_types.hpp> +#include <time.h> #include <sys/time.h> #include "clock_gettime_stub.c" - struct Timer { Timer() { Reset(); } - void Reset() - { - clock_gettime(CLOCK_MONOTONIC, &start_t); + void Reset() + { + clock_gettime(CLOCK_MONOTONIC, &start_t); } double Elapsed() const { timespec end_t; - clock_gettime(CLOCK_MONOTONIC, &end_t); - const double elapsed = (end_t.tv_sec - start_t.tv_sec) + clock_gettime(CLOCK_MONOTONIC, &end_t); + const double elapsed = (end_t.tv_sec - start_t.tv_sec) + (end_t.tv_nsec - start_t.tv_nsec) / 1000000000.0; return elapsed; } @@ -31,7 +37,7 @@ void PYPTopics::sample(const Corpus& corpus, int samples) { m_word_pyps.push_back(PYPs()); } - std::cerr << " Training with " << m_word_pyps.size()-1 << " backoff level" + std::cerr << " Training with " << m_word_pyps.size()-1 << " backoff level" << (m_word_pyps.size()==2 ? ":" : "s:") << std::endl; for (int i=0; i<(int)m_word_pyps.size(); ++i) @@ -44,13 +50,13 @@ void PYPTopics::sample(const Corpus& corpus, int samples) { m_term_p0 = 1.0/corpus.num_types(); m_backoff_p0 = 1.0/corpus.num_documents(); - std::cerr << " Documents: " << corpus.num_documents() << " Terms: " + std::cerr << " Documents: " << corpus.num_documents() << " Terms: " << corpus.num_types() << std::endl; timer.Reset(); // Initialisation pass int document_id=0, topic_counter=0; - for (Corpus::const_iterator corpusIt=corpus.begin(); + for (Corpus::const_iterator corpusIt=corpus.begin(); corpusIt != corpus.end(); ++corpusIt, ++document_id) { m_corpus_topics.push_back(DocumentTopics(corpusIt->size(), 0)); @@ -115,7 +121,7 @@ void PYPTopics::sample(const Corpus& corpus, int samples) { decrement(term, current_topic); int table_delta = m_document_pyps[document_id].decrement(current_topic); - if (m_use_topic_pyp && table_delta < 0) + if (m_use_topic_pyp && table_delta < 0) m_topic_pyp.decrement(current_topic); // sample a new_topic @@ -176,7 +182,7 @@ void PYPTopics::sample(const Corpus& corpus, int samples) { std::cerr.precision(2); for (PYPs::iterator pypIt=m_word_pyps.front().begin(); pypIt != m_word_pyps.front().end(); ++pypIt, ++k) { - std::cerr << "<" << k << ":" << pypIt->num_customers() << "," + std::cerr << "<" << k << ":" << pypIt->num_customers() << "," << pypIt->num_types() << "," << m_topic_pyp.prob(k, m_topic_p0) << "> "; if (k % 5 == 0) std::cerr << std::endl << '\t'; } @@ -227,7 +233,7 @@ int PYPTopics::sample(const DocumentId& doc, const Term& term) { // Second pass: sample a topic F cutoff = mt_genrand_res53() * sum; for (int k=0; k<m_num_topics; ++k) { - if (cutoff <= sums[k]) + if (cutoff <= sums[k]) return k; } assert(false); @@ -293,7 +299,7 @@ int PYPTopics::max(const DocumentId& doc) const { //F p_w_k = prob(term, k); F topic_prob = m_topic_p0; - if (m_use_topic_pyp) + if (m_use_topic_pyp) topic_prob = m_topic_pyp.prob(k, m_topic_p0); F prob = 0; @@ -318,7 +324,7 @@ int PYPTopics::max(const DocumentId& doc, const Term& term) const { F p_w_k = prob(term, k); F topic_prob = m_topic_p0; - if (m_use_topic_pyp) + if (m_use_topic_pyp) topic_prob = m_topic_pyp.prob(k, m_topic_p0); F p_k_d = 0; @@ -336,7 +342,7 @@ int PYPTopics::max(const DocumentId& doc, const Term& term) const { } std::ostream& PYPTopics::print_document_topics(std::ostream& out) const { - for (CorpusTopics::const_iterator corpusIt=m_corpus_topics.begin(); + for (CorpusTopics::const_iterator corpusIt=m_corpus_topics.begin(); corpusIt != m_corpus_topics.end(); ++corpusIt) { int term_index=0; for (DocumentTopics::const_iterator docIt=corpusIt->begin(); @@ -350,7 +356,7 @@ std::ostream& PYPTopics::print_document_topics(std::ostream& out) const { } std::ostream& PYPTopics::print_topic_terms(std::ostream& out) const { - for (PYPs::const_iterator pypsIt=m_word_pyps.front().begin(); + for (PYPs::const_iterator pypsIt=m_word_pyps.front().begin(); pypsIt != m_word_pyps.front().end(); ++pypsIt) { int term_index=0; for (PYP<int>::const_iterator termIt=pypsIt->begin(); |