diff options
author | philblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-06-29 04:28:03 +0000 |
---|---|---|
committer | philblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-06-29 04:28:03 +0000 |
commit | 3418c41232756adb9bf29036980e55a3ce0140e0 (patch) | |
tree | 482a04c763884f9f5d6c58043e22cae2c3103976 /gi/pyp-topics/src/contexts_corpus.hh | |
parent | ad418214fe3b3fcd33d81225eb3d3fb08b67f88a (diff) |
Debugged hierarchical backoff model.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@48 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pyp-topics/src/contexts_corpus.hh')
-rw-r--r-- | gi/pyp-topics/src/contexts_corpus.hh | 35 |
1 files changed, 33 insertions, 2 deletions
diff --git a/gi/pyp-topics/src/contexts_corpus.hh b/gi/pyp-topics/src/contexts_corpus.hh index e680cef5..bd0cd34c 100644 --- a/gi/pyp-topics/src/contexts_corpus.hh +++ b/gi/pyp-topics/src/contexts_corpus.hh @@ -11,6 +11,36 @@ #include "contexts_lexer.h" #include "../../../decoder/dict.h" + +class BackoffGenerator { +public: + virtual ContextsLexer::Context + operator()(const ContextsLexer::Context& c) = 0; + +protected: + ContextsLexer::Context strip_edges(const ContextsLexer::Context& c) { + if (c.size() <= 1) return ContextsLexer::Context(); + assert(c.size() % 2 == 1); + return ContextsLexer::Context(c.begin() + 1, c.end() - 1); + } +}; + +class NullBackoffGenerator : public BackoffGenerator { + virtual ContextsLexer::Context + operator()(const ContextsLexer::Context&) + { return ContextsLexer::Context(); } +}; + +class SimpleBackoffGenerator : public BackoffGenerator { + virtual ContextsLexer::Context + operator()(const ContextsLexer::Context& c) { + if (c.size() <= 3) + return ContextsLexer::Context(); + return strip_edges(c); + } +}; + + //////////////////////////////////////////////////////////////// // ContextsCorpus //////////////////////////////////////////////////////////////// @@ -22,10 +52,11 @@ public: typedef boost::ptr_vector<Document>::const_iterator const_iterator; public: - ContextsCorpus() {} + ContextsCorpus() : m_backoff(new TermBackoff) {} virtual ~ContextsCorpus() {} - unsigned read_contexts(const std::string &filename); + unsigned read_contexts(const std::string &filename, + BackoffGenerator* backoff_gen=0); TermBackoffPtr backoff_index() { return m_backoff; |