diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-11-05 15:29:46 +0100 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-11-05 15:29:46 +0100 |
commit | 1db70a45d59946560fbd5db6487b55a8674ef973 (patch) | |
tree | 172585dafe4d1462f22d8200e733d52dddb55b1e /gi/pyp-topics/src/mpi-corpus.hh | |
parent | 4dd5216d3afa9ab72b150e250a3c30a5f223ce53 (diff) | |
parent | 6bbf03ac46bd57400aa9e65a321a304a234af935 (diff) |
merge upstream/master
Diffstat (limited to 'gi/pyp-topics/src/mpi-corpus.hh')
-rw-r--r-- | gi/pyp-topics/src/mpi-corpus.hh | 69 |
1 files changed, 0 insertions, 69 deletions
diff --git a/gi/pyp-topics/src/mpi-corpus.hh b/gi/pyp-topics/src/mpi-corpus.hh deleted file mode 100644 index f5c478a9..00000000 --- a/gi/pyp-topics/src/mpi-corpus.hh +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef _MPI_CORPUS_HH -#define _MPI_CORPUS_HH - -#include <vector> -#include <string> -#include <map> -#include <tr1/unordered_map> - -#include <boost/ptr_container/ptr_vector.hpp> -#include <boost/mpi/environment.hpp> -#include <boost/mpi/communicator.hpp> - -#include "contexts_corpus.hh" - - -//////////////////////////////////////////////////////////////// -// MPICorpus -//////////////////////////////////////////////////////////////// - -class MPICorpus : public ContextsCorpus { -public: - MPICorpus() : ContextsCorpus() { - boost::mpi::communicator world; - m_rank = world.rank(); - m_size = world.size(); - m_start = -1; - m_end = -1; - } - virtual ~MPICorpus() {} - - virtual unsigned read_contexts(const std::string &filename, - BackoffGenerator* backoff_gen=0, - bool filter_singeltons=false, - bool binary_contexts=false) { - unsigned result = ContextsCorpus::read_contexts(filename, backoff_gen, filter_singeltons, binary_contexts); - - if (m_rank == 0) std::cerr << "\tLoad balancing terms per mpi segment:" << std::endl; - float segment_size = num_terms() / m_size; - float term_threshold = segment_size; - int seen_terms = 0; - std::vector<int> end_points; - for (int i=0; i < num_documents(); ++i) { - seen_terms += m_documents.at(i).size(); - if (seen_terms >= term_threshold) { - end_points.push_back(i+1); - term_threshold += segment_size; - if (m_rank == 0) std::cerr << "\t\t" << i+1 << ": " << seen_terms << " terms, " << 100*seen_terms / (float)num_terms() << "%" << std::endl; - } - } - m_start = (m_rank == 0 ? 0 : end_points.at(m_rank-1)); - m_end = (m_rank == m_size-1 ? num_documents() : end_points.at(m_rank)); - - return result; - } - - void - bounds(int* start, int* end) const { - *start = m_start; - *end = m_end; - } - - - -protected: - int m_rank, m_size; - int m_start, m_end; -}; - -#endif // _MPI_CORPUS_HH |