diff options
Diffstat (limited to 'gi/pyp-topics/src')
| -rw-r--r-- | gi/pyp-topics/src/pyp-topics.cc | 40 | ||||
| -rw-r--r-- | gi/pyp-topics/src/timing.h | 6 | ||||
| -rw-r--r-- | gi/pyp-topics/src/workers.hh | 5 | 
3 files changed, 29 insertions, 22 deletions
diff --git a/gi/pyp-topics/src/pyp-topics.cc b/gi/pyp-topics/src/pyp-topics.cc index 48ccf507..76f95b2a 100644 --- a/gi/pyp-topics/src/pyp-topics.cc +++ b/gi/pyp-topics/src/pyp-topics.cc @@ -1,9 +1,9 @@ -#include "pyp-topics.hh"  #include "timing.h" +#include "pyp-topics.hh"  //#include <boost/date_time/posix_time/posix_time_types.hpp> -void PYPTopics::sample_corpus(const Corpus& corpus, int samples,  -                              int freq_cutoff_start, int freq_cutoff_end,  +void PYPTopics::sample_corpus(const Corpus& corpus, int samples, +                              int freq_cutoff_start, int freq_cutoff_end,                                int freq_cutoff_interval) {    Timer timer; @@ -35,7 +35,7 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples,      << corpus.num_types() << std::endl;    int frequency_cutoff = freq_cutoff_start; -  std::cerr << " Context frequency cutoff set to " << frequency_cutoff << std::endl;  +  std::cerr << " Context frequency cutoff set to " << frequency_cutoff << std::endl;    timer.Reset();    // Initialisation pass @@ -78,11 +78,11 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples,    // Sampling phase    for (int curr_sample=0; curr_sample < samples; ++curr_sample) { -    if (freq_cutoff_interval > 0 && curr_sample != 1  -        && curr_sample % freq_cutoff_interval == 1  +    if (freq_cutoff_interval > 0 && curr_sample != 1 +        && curr_sample % freq_cutoff_interval == 1          && frequency_cutoff > freq_cutoff_end) {        frequency_cutoff--; -      std::cerr << "\n Context frequency cutoff set to " << frequency_cutoff << std::endl;  +      std::cerr << "\n Context frequency cutoff set to " << frequency_cutoff << std::endl;      }      std::cerr << "\n  -- Sample " << curr_sample << " "; std::cerr.flush(); @@ -112,7 +112,7 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples,             docIt != docEnd; ++docIt, ++term_index) {          Term term = *docIt;          int freq = corpus.context_count(term); -        if (freq < frequency_cutoff)  +        if (freq < frequency_cutoff)            continue;          // remove the prevous topic from the PYPs @@ -122,7 +122,7 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples,            decrement(term, current_topic);            int table_delta = m_document_pyps[document_id].decrement(current_topic); -          if (m_use_topic_pyp && table_delta < 0)  +          if (m_use_topic_pyp && table_delta < 0)              m_topic_pyp.decrement(current_topic);          } @@ -168,15 +168,15 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples,          JobReturnsF job = boost::bind(&PYPTopics::hresample_docs, this, max_threads, i);          workers.push_back(new SimpleResampleWorker(job));        } -       -      WorkerPtrVect::iterator workerIt;  + +      WorkerPtrVect::iterator workerIt;        for (workerIt = workers.begin(); workerIt != workers.end(); ++workerIt) -      {  +      {          //std::cerr << "Retrieving worker result.."; std::cerr.flush();          F wresult = workerIt->getResult(); //blocks until worker done -        log_p += wresult;  +        log_p += wresult;          //std::cerr << ".. got " << wresult << std::endl; std::cerr.flush(); -         +        }        if (m_use_topic_pyp) { @@ -194,7 +194,7 @@ void PYPTopics::sample_corpus(const Corpus& corpus, int samples,        for (PYPs::iterator pypIt=m_word_pyps.front().begin();             pypIt != m_word_pyps.front().end(); ++pypIt, ++k) {          if (k % 5 == 0) std::cerr << std::endl << '\t'; -        std::cerr << "<" << k << ":" << pypIt->num_customers() << ","  +        std::cerr << "<" << k << ":" << pypIt->num_customers() << ","            << pypIt->num_types() << "," << m_topic_pyp.prob(k, m_topic_p0) << "> ";        }        std::cerr.precision(4); @@ -210,9 +210,9 @@ PYPTopics::F PYPTopics::hresample_docs(int num_threads, int thread_id)    F log_p = 0.0;    PYPs::iterator pypIt = m_document_pyps.begin();    PYPs::iterator end = m_document_pyps.end(); -  pypIt += thread_id;   +  pypIt += thread_id;  //  std::cerr << thread_id << " started " << std::endl; std::cerr.flush(); -   +    while (pypIt < end)    {      pypIt->resample_prior(); @@ -223,7 +223,7 @@ PYPTopics::F PYPTopics::hresample_docs(int num_threads, int thread_id)      pypIt += num_threads;    }  //  std::cerr << thread_id << " did " << resample_counter << " with answer " << log_p << std::endl; std::cerr.flush(); -   +    return log_p;  } @@ -240,8 +240,8 @@ PYPTopics::F PYPTopics::hresample_docs(int num_threads, int thread_id)  //    }  //  }  //  //std::cerr << "topicworker has answer " << log_p << std::endl; std::cerr.flush(); -//   -// return log_p;  +// +// return log_p;  //}  void PYPTopics::decrement(const Term& term, int topic, int level) { diff --git a/gi/pyp-topics/src/timing.h b/gi/pyp-topics/src/timing.h index 7543295c..08360b0f 100644 --- a/gi/pyp-topics/src/timing.h +++ b/gi/pyp-topics/src/timing.h @@ -4,9 +4,15 @@  #ifdef __CYGWIN__  # ifndef _POSIX_MONOTONIC_CLOCK  #  define _POSIX_MONOTONIC_CLOCK +// this modifies <time.h> +# endif +// in case someone included <time.h> before we got here (this is lifted from time.h>) +# ifndef CLOCK_MONOTONIC +#  define CLOCK_MONOTONIC (clockid_t)4  # endif  #endif +  #include <time.h>  #include <sys/time.h>  #include "clock_gettime_stub.c" diff --git a/gi/pyp-topics/src/workers.hh b/gi/pyp-topics/src/workers.hh index 1f496acf..55424c8d 100644 --- a/gi/pyp-topics/src/workers.hh +++ b/gi/pyp-topics/src/workers.hh @@ -1,6 +1,8 @@  #ifndef WORKERS_HH  #define WORKERS_HH +#include "timing.h" +  #include <iostream>  #include <boost/bind.hpp>  #include <boost/function.hpp> @@ -10,7 +12,6 @@  //#include <boost/date_time/posix_time/posix_time_types.hpp> -#include "timing.h"  template <typename J, typename R>  class SimpleWorker @@ -27,7 +28,7 @@ public:      R run() //this is called upon thread creation      {          R wresult = 0; -     +          assert(job);          timer.Reset();          wresult = job();  | 
