diff options
author | Chris Dyer <cdyer@cab.ark.cs.cmu.edu> | 2012-10-02 00:19:43 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cab.ark.cs.cmu.edu> | 2012-10-02 00:19:43 -0400 |
commit | e26434979adc33bd949566ba7bf02dff64e80a3e (patch) | |
tree | d1c72495e3af6301bd28e7e66c42de0c7a944d1f /gi/pyp-topics/src/mpi-corpus.hh | |
parent | 0870d4a1f5e14cc7daf553b180d599f09f6614a2 (diff) |
cdec cleanup, remove bayesian stuff, parsing stuff
Diffstat (limited to 'gi/pyp-topics/src/mpi-corpus.hh')
-rw-r--r-- | gi/pyp-topics/src/mpi-corpus.hh | 69 |
1 files changed, 0 insertions, 69 deletions
diff --git a/gi/pyp-topics/src/mpi-corpus.hh b/gi/pyp-topics/src/mpi-corpus.hh deleted file mode 100644 index f5c478a9..00000000 --- a/gi/pyp-topics/src/mpi-corpus.hh +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef _MPI_CORPUS_HH -#define _MPI_CORPUS_HH - -#include <vector> -#include <string> -#include <map> -#include <tr1/unordered_map> - -#include <boost/ptr_container/ptr_vector.hpp> -#include <boost/mpi/environment.hpp> -#include <boost/mpi/communicator.hpp> - -#include "contexts_corpus.hh" - - -//////////////////////////////////////////////////////////////// -// MPICorpus -//////////////////////////////////////////////////////////////// - -class MPICorpus : public ContextsCorpus { -public: - MPICorpus() : ContextsCorpus() { - boost::mpi::communicator world; - m_rank = world.rank(); - m_size = world.size(); - m_start = -1; - m_end = -1; - } - virtual ~MPICorpus() {} - - virtual unsigned read_contexts(const std::string &filename, - BackoffGenerator* backoff_gen=0, - bool filter_singeltons=false, - bool binary_contexts=false) { - unsigned result = ContextsCorpus::read_contexts(filename, backoff_gen, filter_singeltons, binary_contexts); - - if (m_rank == 0) std::cerr << "\tLoad balancing terms per mpi segment:" << std::endl; - float segment_size = num_terms() / m_size; - float term_threshold = segment_size; - int seen_terms = 0; - std::vector<int> end_points; - for (int i=0; i < num_documents(); ++i) { - seen_terms += m_documents.at(i).size(); - if (seen_terms >= term_threshold) { - end_points.push_back(i+1); - term_threshold += segment_size; - if (m_rank == 0) std::cerr << "\t\t" << i+1 << ": " << seen_terms << " terms, " << 100*seen_terms / (float)num_terms() << "%" << std::endl; - } - } - m_start = (m_rank == 0 ? 0 : end_points.at(m_rank-1)); - m_end = (m_rank == m_size-1 ? num_documents() : end_points.at(m_rank)); - - return result; - } - - void - bounds(int* start, int* end) const { - *start = m_start; - *end = m_end; - } - - - -protected: - int m_rank, m_size; - int m_start, m_end; -}; - -#endif // _MPI_CORPUS_HH |