summaryrefslogtreecommitdiff
path: root/gi/pyp-topics/src
diff options
context:
space:
mode:
Diffstat (limited to 'gi/pyp-topics/src')
-rw-r--r--gi/pyp-topics/src/pyp-topics.cc38
1 files changed, 22 insertions, 16 deletions
diff --git a/gi/pyp-topics/src/pyp-topics.cc b/gi/pyp-topics/src/pyp-topics.cc
index 186267d3..4fb75caa 100644
--- a/gi/pyp-topics/src/pyp-topics.cc
+++ b/gi/pyp-topics/src/pyp-topics.cc
@@ -1,21 +1,27 @@
+#ifdef __CYGWIN__
+# ifndef _POSIX_MONOTONIC_CLOCK
+# define _POSIX_MONOTONIC_CLOCK
+# endif
+#endif
+
#include "pyp-topics.hh"
//#include "mt19937ar.h"
#include <boost/date_time/posix_time/posix_time_types.hpp>
+#include <time.h>
#include <sys/time.h>
#include "clock_gettime_stub.c"
-
struct Timer {
Timer() { Reset(); }
- void Reset()
- {
- clock_gettime(CLOCK_MONOTONIC, &start_t);
+ void Reset()
+ {
+ clock_gettime(CLOCK_MONOTONIC, &start_t);
}
double Elapsed() const {
timespec end_t;
- clock_gettime(CLOCK_MONOTONIC, &end_t);
- const double elapsed = (end_t.tv_sec - start_t.tv_sec)
+ clock_gettime(CLOCK_MONOTONIC, &end_t);
+ const double elapsed = (end_t.tv_sec - start_t.tv_sec)
+ (end_t.tv_nsec - start_t.tv_nsec) / 1000000000.0;
return elapsed;
}
@@ -31,7 +37,7 @@ void PYPTopics::sample(const Corpus& corpus, int samples) {
m_word_pyps.push_back(PYPs());
}
- std::cerr << " Training with " << m_word_pyps.size()-1 << " backoff level"
+ std::cerr << " Training with " << m_word_pyps.size()-1 << " backoff level"
<< (m_word_pyps.size()==2 ? ":" : "s:") << std::endl;
for (int i=0; i<(int)m_word_pyps.size(); ++i)
@@ -44,13 +50,13 @@ void PYPTopics::sample(const Corpus& corpus, int samples) {
m_term_p0 = 1.0/corpus.num_types();
m_backoff_p0 = 1.0/corpus.num_documents();
- std::cerr << " Documents: " << corpus.num_documents() << " Terms: "
+ std::cerr << " Documents: " << corpus.num_documents() << " Terms: "
<< corpus.num_types() << std::endl;
timer.Reset();
// Initialisation pass
int document_id=0, topic_counter=0;
- for (Corpus::const_iterator corpusIt=corpus.begin();
+ for (Corpus::const_iterator corpusIt=corpus.begin();
corpusIt != corpus.end(); ++corpusIt, ++document_id) {
m_corpus_topics.push_back(DocumentTopics(corpusIt->size(), 0));
@@ -115,7 +121,7 @@ void PYPTopics::sample(const Corpus& corpus, int samples) {
decrement(term, current_topic);
int table_delta = m_document_pyps[document_id].decrement(current_topic);
- if (m_use_topic_pyp && table_delta < 0)
+ if (m_use_topic_pyp && table_delta < 0)
m_topic_pyp.decrement(current_topic);
// sample a new_topic
@@ -176,7 +182,7 @@ void PYPTopics::sample(const Corpus& corpus, int samples) {
std::cerr.precision(2);
for (PYPs::iterator pypIt=m_word_pyps.front().begin();
pypIt != m_word_pyps.front().end(); ++pypIt, ++k) {
- std::cerr << "<" << k << ":" << pypIt->num_customers() << ","
+ std::cerr << "<" << k << ":" << pypIt->num_customers() << ","
<< pypIt->num_types() << "," << m_topic_pyp.prob(k, m_topic_p0) << "> ";
if (k % 5 == 0) std::cerr << std::endl << '\t';
}
@@ -227,7 +233,7 @@ int PYPTopics::sample(const DocumentId& doc, const Term& term) {
// Second pass: sample a topic
F cutoff = mt_genrand_res53() * sum;
for (int k=0; k<m_num_topics; ++k) {
- if (cutoff <= sums[k])
+ if (cutoff <= sums[k])
return k;
}
assert(false);
@@ -293,7 +299,7 @@ int PYPTopics::max(const DocumentId& doc) const {
//F p_w_k = prob(term, k);
F topic_prob = m_topic_p0;
- if (m_use_topic_pyp)
+ if (m_use_topic_pyp)
topic_prob = m_topic_pyp.prob(k, m_topic_p0);
F prob = 0;
@@ -318,7 +324,7 @@ int PYPTopics::max(const DocumentId& doc, const Term& term) const {
F p_w_k = prob(term, k);
F topic_prob = m_topic_p0;
- if (m_use_topic_pyp)
+ if (m_use_topic_pyp)
topic_prob = m_topic_pyp.prob(k, m_topic_p0);
F p_k_d = 0;
@@ -336,7 +342,7 @@ int PYPTopics::max(const DocumentId& doc, const Term& term) const {
}
std::ostream& PYPTopics::print_document_topics(std::ostream& out) const {
- for (CorpusTopics::const_iterator corpusIt=m_corpus_topics.begin();
+ for (CorpusTopics::const_iterator corpusIt=m_corpus_topics.begin();
corpusIt != m_corpus_topics.end(); ++corpusIt) {
int term_index=0;
for (DocumentTopics::const_iterator docIt=corpusIt->begin();
@@ -350,7 +356,7 @@ std::ostream& PYPTopics::print_document_topics(std::ostream& out) const {
}
std::ostream& PYPTopics::print_topic_terms(std::ostream& out) const {
- for (PYPs::const_iterator pypsIt=m_word_pyps.front().begin();
+ for (PYPs::const_iterator pypsIt=m_word_pyps.front().begin();
pypsIt != m_word_pyps.front().end(); ++pypsIt) {
int term_index=0;
for (PYP<int>::const_iterator termIt=pypsIt->begin();