From e26434979adc33bd949566ba7bf02dff64e80a3e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- gi/pyp-topics/src/mpi-pyp-topics.hh | 106 ------------------------------------ 1 file changed, 106 deletions(-) delete mode 100644 gi/pyp-topics/src/mpi-pyp-topics.hh (limited to 'gi/pyp-topics/src/mpi-pyp-topics.hh') diff --git a/gi/pyp-topics/src/mpi-pyp-topics.hh b/gi/pyp-topics/src/mpi-pyp-topics.hh deleted file mode 100644 index d96bc4e5..00000000 --- a/gi/pyp-topics/src/mpi-pyp-topics.hh +++ /dev/null @@ -1,106 +0,0 @@ -#ifndef MPI_PYP_TOPICS_HH -#define MPI_PYP_TOPICS_HH - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include "mpi-pyp.hh" -#include "mpi-corpus.hh" - -class MPIPYPTopics { -public: - typedef std::vector DocumentTopics; - typedef std::vector CorpusTopics; - typedef double F; - -public: - MPIPYPTopics(int num_topics, bool use_topic_pyp=false, unsigned long seed = 0) - : m_num_topics(num_topics), m_word_pyps(1), - m_topic_pyp(0.5,1.0), m_use_topic_pyp(use_topic_pyp), - m_seed(seed), - uni_dist(0,1), rng(seed == 0 ? (unsigned long)this : seed), - rnd(rng, uni_dist), m_mpi_start(-1), m_mpi_end(-1) { - boost::mpi::communicator m_world; - m_rank = m_world.rank(); - m_size = m_world.size(); - m_am_root = (m_rank == 0); - } - - void sample_corpus(const MPICorpus& corpus, int samples, - int freq_cutoff_start=0, int freq_cutoff_end=0, - int freq_cutoff_interval=0, - int max_contexts_per_document=0); - - int sample(const DocumentId& doc, const Term& term); - std::pair max(const DocumentId& doc, const Term& term) const; - std::pair max(const DocumentId& doc) const; - int max_topic() const; - - void set_backoff(const std::string& filename) { - m_backoff.reset(new TermBackoff); - m_backoff->read(filename); - m_word_pyps.clear(); - m_word_pyps.resize(m_backoff->order(), MPIPYPs()); - } - void set_backoff(TermBackoffPtr backoff) { - m_backoff = backoff; - m_word_pyps.clear(); - m_word_pyps.resize(m_backoff->order(), MPIPYPs()); - } - - F prob(const Term& term, int topic, int level=0) const; - void decrement(const Term& term, int topic, int level=0); - void increment(const Term& term, int topic, int level=0); - - std::ostream& print_document_topics(std::ostream& out) const; - std::ostream& print_topic_terms(std::ostream& out) const; - - void synchronise(); - -private: - F word_pyps_p0(const Term& term, int topic, int level) const; - - int m_num_topics; - F m_term_p0, m_topic_p0, m_backoff_p0; - - CorpusTopics m_corpus_topics; - typedef boost::ptr_vector< PYP > PYPs; - typedef boost::ptr_vector< MPIPYP > MPIPYPs; - PYPs m_document_pyps; - std::vector m_word_pyps; - MPIPYP m_topic_pyp; - bool m_use_topic_pyp; - - unsigned long m_seed; - - //typedef boost::mt19937 base_generator_type; - //typedef boost::hellekalek1995 base_generator_type; - typedef boost::lagged_fibonacci607 base_generator_type; - typedef boost::uniform_real<> uni_dist_type; - typedef boost::variate_generator gen_type; - - uni_dist_type uni_dist; - base_generator_type rng; //this gets the seed - gen_type rnd; //instantiate: rnd(rng, uni_dist) - //call: rnd() generates uniform on [0,1) - - TermBackoffPtr m_backoff; - - boost::mpi::communicator m_world; - bool m_am_root; - int m_rank, m_size; - int m_mpi_start, m_mpi_end; -}; - -#endif // PYP_TOPICS_HH -- cgit v1.2.3