summaryrefslogtreecommitdiff
path: root/gi/pyp-topics/src/contexts_corpus.hh
diff options
context:
space:
mode:
authorphilblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-29 04:28:03 +0000
committerphilblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-29 04:28:03 +0000
commit3418c41232756adb9bf29036980e55a3ce0140e0 (patch)
tree482a04c763884f9f5d6c58043e22cae2c3103976 /gi/pyp-topics/src/contexts_corpus.hh
parentad418214fe3b3fcd33d81225eb3d3fb08b67f88a (diff)
Debugged hierarchical backoff model.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@48 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pyp-topics/src/contexts_corpus.hh')
-rw-r--r--gi/pyp-topics/src/contexts_corpus.hh35
1 files changed, 33 insertions, 2 deletions
diff --git a/gi/pyp-topics/src/contexts_corpus.hh b/gi/pyp-topics/src/contexts_corpus.hh
index e680cef5..bd0cd34c 100644
--- a/gi/pyp-topics/src/contexts_corpus.hh
+++ b/gi/pyp-topics/src/contexts_corpus.hh
@@ -11,6 +11,36 @@
#include "contexts_lexer.h"
#include "../../../decoder/dict.h"
+
+class BackoffGenerator {
+public:
+ virtual ContextsLexer::Context
+ operator()(const ContextsLexer::Context& c) = 0;
+
+protected:
+ ContextsLexer::Context strip_edges(const ContextsLexer::Context& c) {
+ if (c.size() <= 1) return ContextsLexer::Context();
+ assert(c.size() % 2 == 1);
+ return ContextsLexer::Context(c.begin() + 1, c.end() - 1);
+ }
+};
+
+class NullBackoffGenerator : public BackoffGenerator {
+ virtual ContextsLexer::Context
+ operator()(const ContextsLexer::Context&)
+ { return ContextsLexer::Context(); }
+};
+
+class SimpleBackoffGenerator : public BackoffGenerator {
+ virtual ContextsLexer::Context
+ operator()(const ContextsLexer::Context& c) {
+ if (c.size() <= 3)
+ return ContextsLexer::Context();
+ return strip_edges(c);
+ }
+};
+
+
////////////////////////////////////////////////////////////////
// ContextsCorpus
////////////////////////////////////////////////////////////////
@@ -22,10 +52,11 @@ public:
typedef boost::ptr_vector<Document>::const_iterator const_iterator;
public:
- ContextsCorpus() {}
+ ContextsCorpus() : m_backoff(new TermBackoff) {}
virtual ~ContextsCorpus() {}
- unsigned read_contexts(const std::string &filename);
+ unsigned read_contexts(const std::string &filename,
+ BackoffGenerator* backoff_gen=0);
TermBackoffPtr backoff_index() {
return m_backoff;