diff options
Diffstat (limited to 'gi')
-rw-r--r-- | gi/pyp-topics/src/pyp-topics.cc | 40 | ||||
-rw-r--r-- | gi/pyp-topics/src/timing.h | 6 | ||||
-rw-r--r-- | gi/pyp-topics/src/workers.hh | 5 |
3 files changed, 29 insertions, 22 deletions
diff --git a/gi/pyp-topics/src/pyp-topics.cc b/gi/pyp-topics/src/pyp-topics.cc index 48ccf507..76f95b2a 100644 --- a/gi/pyp-topics/src/pyp-topics.cc +++ b/gi/pyp-topics/src/pyp-topics.cc @@ -1,9 +1,9 @@ -#include "pyp-topics.hh" #include "timing.h" +#include "pyp-topics.hh" //#include <boost/date_time/posix_time/posix_time_types.hpp> -void PYPTopics::sample_corpus(const Corpus& corpus, int samples, - int freq_cutoff_start, int freq_cutoff_end, +void PYPTopics::sample_corpus(const Corpus& corpus, int samples, + int freq_cutoff_start, int freq_cutoff_end, int freq_cutoff_interval) { Timer timer; @@ -35,7 +35,7 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples, << corpus.num_types() << std::endl; int frequency_cutoff = freq_cutoff_start; - std::cerr << " Context frequency cutoff set to " << frequency_cutoff << std::endl; + std::cerr << " Context frequency cutoff set to " << frequency_cutoff << std::endl; timer.Reset(); // Initialisation pass @@ -78,11 +78,11 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples, // Sampling phase for (int curr_sample=0; curr_sample < samples; ++curr_sample) { - if (freq_cutoff_interval > 0 && curr_sample != 1 - && curr_sample % freq_cutoff_interval == 1 + if (freq_cutoff_interval > 0 && curr_sample != 1 + && curr_sample % freq_cutoff_interval == 1 && frequency_cutoff > freq_cutoff_end) { frequency_cutoff--; - std::cerr << "\n Context frequency cutoff set to " << frequency_cutoff << std::endl; + std::cerr << "\n Context frequency cutoff set to " << frequency_cutoff << std::endl; } std::cerr << "\n -- Sample " << curr_sample << " "; std::cerr.flush(); @@ -112,7 +112,7 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples, docIt != docEnd; ++docIt, ++term_index) { Term term = *docIt; int freq = corpus.context_count(term); - if (freq < frequency_cutoff) + if (freq < frequency_cutoff) continue; // remove the prevous topic from the PYPs @@ -122,7 +122,7 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples, decrement(term, current_topic); int table_delta = m_document_pyps[document_id].decrement(current_topic); - if (m_use_topic_pyp && table_delta < 0) + if (m_use_topic_pyp && table_delta < 0) m_topic_pyp.decrement(current_topic); } @@ -168,15 +168,15 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples, JobReturnsF job = boost::bind(&PYPTopics::hresample_docs, this, max_threads, i); workers.push_back(new SimpleResampleWorker(job)); } - - WorkerPtrVect::iterator workerIt; + + WorkerPtrVect::iterator workerIt; for (workerIt = workers.begin(); workerIt != workers.end(); ++workerIt) - { + { //std::cerr << "Retrieving worker result.."; std::cerr.flush(); F wresult = workerIt->getResult(); //blocks until worker done - log_p += wresult; + log_p += wresult; //std::cerr << ".. got " << wresult << std::endl; std::cerr.flush(); - + } if (m_use_topic_pyp) { @@ -194,7 +194,7 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples, for (PYPs::iterator pypIt=m_word_pyps.front().begin(); pypIt != m_word_pyps.front().end(); ++pypIt, ++k) { if (k % 5 == 0) std::cerr << std::endl << '\t'; - std::cerr << "<" << k << ":" << pypIt->num_customers() << "," + std::cerr << "<" << k << ":" << pypIt->num_customers() << "," << pypIt->num_types() << "," << m_topic_pyp.prob(k, m_topic_p0) << "> "; } std::cerr.precision(4); @@ -210,9 +210,9 @@ PYPTopics::F PYPTopics::hresample_docs(int num_threads, int thread_id) F log_p = 0.0; PYPs::iterator pypIt = m_document_pyps.begin(); PYPs::iterator end = m_document_pyps.end(); - pypIt += thread_id; + pypIt += thread_id; // std::cerr << thread_id << " started " << std::endl; std::cerr.flush(); - + while (pypIt < end) { pypIt->resample_prior(); @@ -223,7 +223,7 @@ PYPTopics::F PYPTopics::hresample_docs(int num_threads, int thread_id) pypIt += num_threads; } // std::cerr << thread_id << " did " << resample_counter << " with answer " << log_p << std::endl; std::cerr.flush(); - + return log_p; } @@ -240,8 +240,8 @@ PYPTopics::F PYPTopics::hresample_docs(int num_threads, int thread_id) // } // } // //std::cerr << "topicworker has answer " << log_p << std::endl; std::cerr.flush(); -// -// return log_p; +// +// return log_p; //} void PYPTopics::decrement(const Term& term, int topic, int level) { diff --git a/gi/pyp-topics/src/timing.h b/gi/pyp-topics/src/timing.h index 7543295c..08360b0f 100644 --- a/gi/pyp-topics/src/timing.h +++ b/gi/pyp-topics/src/timing.h @@ -4,9 +4,15 @@ #ifdef __CYGWIN__ # ifndef _POSIX_MONOTONIC_CLOCK # define _POSIX_MONOTONIC_CLOCK +// this modifies <time.h> +# endif +// in case someone included <time.h> before we got here (this is lifted from time.h>) +# ifndef CLOCK_MONOTONIC +# define CLOCK_MONOTONIC (clockid_t)4 # endif #endif + #include <time.h> #include <sys/time.h> #include "clock_gettime_stub.c" diff --git a/gi/pyp-topics/src/workers.hh b/gi/pyp-topics/src/workers.hh index 1f496acf..55424c8d 100644 --- a/gi/pyp-topics/src/workers.hh +++ b/gi/pyp-topics/src/workers.hh @@ -1,6 +1,8 @@ #ifndef WORKERS_HH #define WORKERS_HH +#include "timing.h" + #include <iostream> #include <boost/bind.hpp> #include <boost/function.hpp> @@ -10,7 +12,6 @@ //#include <boost/date_time/posix_time/posix_time_types.hpp> -#include "timing.h" template <typename J, typename R> class SimpleWorker @@ -27,7 +28,7 @@ public: R run() //this is called upon thread creation { R wresult = 0; - + assert(job); timer.Reset(); wresult = job(); |