From e26434979adc33bd949566ba7bf02dff64e80a3e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 2 Oct 2012 00:19:43 -0400 Subject: cdec cleanup, remove bayesian stuff, parsing stuff --- gi/pyp-topics/src/contexts_corpus.hh | 90 ------------------------------------ 1 file changed, 90 deletions(-) delete mode 100644 gi/pyp-topics/src/contexts_corpus.hh (limited to 'gi/pyp-topics/src/contexts_corpus.hh') diff --git a/gi/pyp-topics/src/contexts_corpus.hh b/gi/pyp-topics/src/contexts_corpus.hh deleted file mode 100644 index 2527f655..00000000 --- a/gi/pyp-topics/src/contexts_corpus.hh +++ /dev/null @@ -1,90 +0,0 @@ -#ifndef _CONTEXTS_CORPUS_HH -#define _CONTEXTS_CORPUS_HH - -#include -#include -#include -#include - -#include - -#include "corpus.hh" -#include "contexts_lexer.h" -#include "dict.h" - - -class BackoffGenerator { -public: - virtual ContextsLexer::Context - operator()(const ContextsLexer::Context& c) = 0; - -protected: - ContextsLexer::Context strip_edges(const ContextsLexer::Context& c) { - if (c.size() <= 1) return ContextsLexer::Context(); - assert(c.size() % 2 == 1); - return ContextsLexer::Context(c.begin() + 1, c.end() - 1); - } -}; - -class NullBackoffGenerator : public BackoffGenerator { - virtual ContextsLexer::Context - operator()(const ContextsLexer::Context&) - { return ContextsLexer::Context(); } -}; - -class SimpleBackoffGenerator : public BackoffGenerator { - virtual ContextsLexer::Context - operator()(const ContextsLexer::Context& c) { - if (c.size() <= 3) - return ContextsLexer::Context(); - return strip_edges(c); - } -}; - - -//////////////////////////////////////////////////////////////// -// ContextsCorpus -//////////////////////////////////////////////////////////////// - -class ContextsCorpus : public Corpus { - friend void read_callback(const ContextsLexer::PhraseContextsType&, void*); - -public: - ContextsCorpus() : m_backoff(new TermBackoff) {} - virtual ~ContextsCorpus() {} - - virtual unsigned read_contexts(const std::string &filename, - BackoffGenerator* backoff_gen=0, - bool filter_singeltons=false, - bool binary_contexts=false); - - TermBackoffPtr backoff_index() { - return m_backoff; - } - - std::vector context2string(const WordID& id) const { - std::vector res; - assert (id >= 0); - m_dict.AsVector(id, &res); - return res; - } - - virtual int context_count(const WordID& id) const { - return m_context_counts.find(id)->second; - } - - - const std::string& key(const int& i) const { - return m_keys.at(i); - } - - const Dict& dict() const { return m_dict; } - -protected: - TermBackoffPtr m_backoff; - Dict m_dict; - std::vector m_keys; - std::tr1::unordered_map m_context_counts; -}; - -#endif // _CONTEXTS_CORPUS_HH -- cgit v1.2.3