summaryrefslogtreecommitdiff
path: root/gi/pyp-topics/src/contexts_corpus.hh
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cab.ark.cs.cmu.edu>2012-10-02 00:19:43 -0400
committerChris Dyer <cdyer@cab.ark.cs.cmu.edu>2012-10-02 00:19:43 -0400
commit925087356b853e2099c1b60d8b757d7aa02121a9 (patch)
tree579925c5c9d3da51f43018a5c6d1c4dfbb72b089 /gi/pyp-topics/src/contexts_corpus.hh
parentea79e535d69f6854d01c62e3752971fb6730d8e7 (diff)
cdec cleanup, remove bayesian stuff, parsing stuff
Diffstat (limited to 'gi/pyp-topics/src/contexts_corpus.hh')
-rw-r--r--gi/pyp-topics/src/contexts_corpus.hh90
1 files changed, 0 insertions, 90 deletions
diff --git a/gi/pyp-topics/src/contexts_corpus.hh b/gi/pyp-topics/src/contexts_corpus.hh
deleted file mode 100644
index 2527f655..00000000
--- a/gi/pyp-topics/src/contexts_corpus.hh
+++ /dev/null
@@ -1,90 +0,0 @@
-#ifndef _CONTEXTS_CORPUS_HH
-#define _CONTEXTS_CORPUS_HH
-
-#include <vector>
-#include <string>
-#include <map>
-#include <tr1/unordered_map>
-
-#include <boost/ptr_container/ptr_vector.hpp>
-
-#include "corpus.hh"
-#include "contexts_lexer.h"
-#include "dict.h"
-
-
-class BackoffGenerator {
-public:
- virtual ContextsLexer::Context
- operator()(const ContextsLexer::Context& c) = 0;
-
-protected:
- ContextsLexer::Context strip_edges(const ContextsLexer::Context& c) {
- if (c.size() <= 1) return ContextsLexer::Context();
- assert(c.size() % 2 == 1);
- return ContextsLexer::Context(c.begin() + 1, c.end() - 1);
- }
-};
-
-class NullBackoffGenerator : public BackoffGenerator {
- virtual ContextsLexer::Context
- operator()(const ContextsLexer::Context&)
- { return ContextsLexer::Context(); }
-};
-
-class SimpleBackoffGenerator : public BackoffGenerator {
- virtual ContextsLexer::Context
- operator()(const ContextsLexer::Context& c) {
- if (c.size() <= 3)
- return ContextsLexer::Context();
- return strip_edges(c);
- }
-};
-
-
-////////////////////////////////////////////////////////////////
-// ContextsCorpus
-////////////////////////////////////////////////////////////////
-
-class ContextsCorpus : public Corpus {
- friend void read_callback(const ContextsLexer::PhraseContextsType&, void*);
-
-public:
- ContextsCorpus() : m_backoff(new TermBackoff) {}
- virtual ~ContextsCorpus() {}
-
- virtual unsigned read_contexts(const std::string &filename,
- BackoffGenerator* backoff_gen=0,
- bool filter_singeltons=false,
- bool binary_contexts=false);
-
- TermBackoffPtr backoff_index() {
- return m_backoff;
- }
-
- std::vector<std::string> context2string(const WordID& id) const {
- std::vector<std::string> res;
- assert (id >= 0);
- m_dict.AsVector(id, &res);
- return res;
- }
-
- virtual int context_count(const WordID& id) const {
- return m_context_counts.find(id)->second;
- }
-
-
- const std::string& key(const int& i) const {
- return m_keys.at(i);
- }
-
- const Dict& dict() const { return m_dict; }
-
-protected:
- TermBackoffPtr m_backoff;
- Dict m_dict;
- std::vector<std::string> m_keys;
- std::tr1::unordered_map<int,int> m_context_counts;
-};
-
-#endif // _CONTEXTS_CORPUS_HH