From e26434979adc33bd949566ba7bf02dff64e80a3e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- gi/pyp-topics/src/mpi-corpus.hh | 69 ----------------------------------------- 1 file changed, 69 deletions(-) delete mode 100644 gi/pyp-topics/src/mpi-corpus.hh (limited to 'gi/pyp-topics/src/mpi-corpus.hh') diff --git a/gi/pyp-topics/src/mpi-corpus.hh b/gi/pyp-topics/src/mpi-corpus.hh deleted file mode 100644 index f5c478a9..00000000 --- a/gi/pyp-topics/src/mpi-corpus.hh +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef _MPI_CORPUS_HH -#define _MPI_CORPUS_HH - -#include -#include -#include -#include - -#include -#include -#include - -#include "contexts_corpus.hh" - - -//////////////////////////////////////////////////////////////// -// MPICorpus -//////////////////////////////////////////////////////////////// - -class MPICorpus : public ContextsCorpus { -public: - MPICorpus() : ContextsCorpus() { - boost::mpi::communicator world; - m_rank = world.rank(); - m_size = world.size(); - m_start = -1; - m_end = -1; - } - virtual ~MPICorpus() {} - - virtual unsigned read_contexts(const std::string &filename, - BackoffGenerator* backoff_gen=0, - bool filter_singeltons=false, - bool binary_contexts=false) { - unsigned result = ContextsCorpus::read_contexts(filename, backoff_gen, filter_singeltons, binary_contexts); - - if (m_rank == 0) std::cerr << "\tLoad balancing terms per mpi segment:" << std::endl; - float segment_size = num_terms() / m_size; - float term_threshold = segment_size; - int seen_terms = 0; - std::vector end_points; - for (int i=0; i < num_documents(); ++i) { - seen_terms += m_documents.at(i).size(); - if (seen_terms >= term_threshold) { - end_points.push_back(i+1); - term_threshold += segment_size; - if (m_rank == 0) std::cerr << "\t\t" << i+1 << ": " << seen_terms << " terms, " << 100*seen_terms / (float)num_terms() << "%" << std::endl; - } - } - m_start = (m_rank == 0 ? 0 : end_points.at(m_rank-1)); - m_end = (m_rank == m_size-1 ? num_documents() : end_points.at(m_rank)); - - return result; - } - - void - bounds(int* start, int* end) const { - *start = m_start; - *end = m_end; - } - - - -protected: - int m_rank, m_size; - int m_start, m_end; -}; - -#endif // _MPI_CORPUS_HH -- cgit v1.2.3