diff options
author | Avneesh Saluja <asaluja@gmail.com> | 2013-03-28 18:28:16 -0700 |
---|---|---|
committer | Avneesh Saluja <asaluja@gmail.com> | 2013-03-28 18:28:16 -0700 |
commit | 3d8d656fa7911524e0e6885647173474524e0784 (patch) | |
tree | 81b1ee2fcb67980376d03f0aa48e42e53abff222 /gi/pyp-topics/src/contexts_corpus.hh | |
parent | be7f57fdd484e063775d7abf083b9fa4c403b610 (diff) | |
parent | 96fedabebafe7a38a6d5928be8fff767e411d705 (diff) |
fixed conflicts
Diffstat (limited to 'gi/pyp-topics/src/contexts_corpus.hh')
-rw-r--r-- | gi/pyp-topics/src/contexts_corpus.hh | 90 |
1 files changed, 0 insertions, 90 deletions
diff --git a/gi/pyp-topics/src/contexts_corpus.hh b/gi/pyp-topics/src/contexts_corpus.hh deleted file mode 100644 index 2527f655..00000000 --- a/gi/pyp-topics/src/contexts_corpus.hh +++ /dev/null @@ -1,90 +0,0 @@ -#ifndef _CONTEXTS_CORPUS_HH -#define _CONTEXTS_CORPUS_HH - -#include <vector> -#include <string> -#include <map> -#include <tr1/unordered_map> - -#include <boost/ptr_container/ptr_vector.hpp> - -#include "corpus.hh" -#include "contexts_lexer.h" -#include "dict.h" - - -class BackoffGenerator { -public: - virtual ContextsLexer::Context - operator()(const ContextsLexer::Context& c) = 0; - -protected: - ContextsLexer::Context strip_edges(const ContextsLexer::Context& c) { - if (c.size() <= 1) return ContextsLexer::Context(); - assert(c.size() % 2 == 1); - return ContextsLexer::Context(c.begin() + 1, c.end() - 1); - } -}; - -class NullBackoffGenerator : public BackoffGenerator { - virtual ContextsLexer::Context - operator()(const ContextsLexer::Context&) - { return ContextsLexer::Context(); } -}; - -class SimpleBackoffGenerator : public BackoffGenerator { - virtual ContextsLexer::Context - operator()(const ContextsLexer::Context& c) { - if (c.size() <= 3) - return ContextsLexer::Context(); - return strip_edges(c); - } -}; - - -//////////////////////////////////////////////////////////////// -// ContextsCorpus -//////////////////////////////////////////////////////////////// - -class ContextsCorpus : public Corpus { - friend void read_callback(const ContextsLexer::PhraseContextsType&, void*); - -public: - ContextsCorpus() : m_backoff(new TermBackoff) {} - virtual ~ContextsCorpus() {} - - virtual unsigned read_contexts(const std::string &filename, - BackoffGenerator* backoff_gen=0, - bool filter_singeltons=false, - bool binary_contexts=false); - - TermBackoffPtr backoff_index() { - return m_backoff; - } - - std::vector<std::string> context2string(const WordID& id) const { - std::vector<std::string> res; - assert (id >= 0); - m_dict.AsVector(id, &res); - return res; - } - - virtual int context_count(const WordID& id) const { - return m_context_counts.find(id)->second; - } - - - const std::string& key(const int& i) const { - return m_keys.at(i); - } - - const Dict& dict() const { return m_dict; } - -protected: - TermBackoffPtr m_backoff; - Dict m_dict; - std::vector<std::string> m_keys; - std::tr1::unordered_map<int,int> m_context_counts; -}; - -#endif // _CONTEXTS_CORPUS_HH |