summaryrefslogtreecommitdiff
path: root/gi/pyp-topics/src/contexts_corpus.hh
diff options
context:
space:
mode:
authorphilblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-28 15:01:17 +0000
committerphilblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-28 15:01:17 +0000
commit1d089b02eff4fa8837faecf99021f624d8845e5d (patch)
treeb6e3d20094514749c37485e154117871cdc8696f /gi/pyp-topics/src/contexts_corpus.hh
parent088725c4708e83343154d1bed9dee18286446eaf (diff)
Added contexts_corpus for reading text data files.
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@36 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pyp-topics/src/contexts_corpus.hh')
-rw-r--r--gi/pyp-topics/src/contexts_corpus.hh39
1 files changed, 39 insertions, 0 deletions
diff --git a/gi/pyp-topics/src/contexts_corpus.hh b/gi/pyp-topics/src/contexts_corpus.hh
new file mode 100644
index 00000000..e680cef5
--- /dev/null
+++ b/gi/pyp-topics/src/contexts_corpus.hh
@@ -0,0 +1,39 @@
+#ifndef _CONTEXTS_CORPUS_HH
+#define _CONTEXTS_CORPUS_HH
+
+#include <vector>
+#include <string>
+#include <map>
+
+#include <boost/ptr_container/ptr_vector.hpp>
+
+#include "corpus.hh"
+#include "contexts_lexer.h"
+#include "../../../decoder/dict.h"
+
+////////////////////////////////////////////////////////////////
+// ContextsCorpus
+////////////////////////////////////////////////////////////////
+
+class ContextsCorpus : public Corpus {
+ friend void read_callback(const ContextsLexer::PhraseContextsType&, void*);
+
+public:
+ typedef boost::ptr_vector<Document>::const_iterator const_iterator;
+
+public:
+ ContextsCorpus() {}
+ virtual ~ContextsCorpus() {}
+
+ unsigned read_contexts(const std::string &filename);
+
+ TermBackoffPtr backoff_index() {
+ return m_backoff;
+ }
+
+private:
+ TermBackoffPtr m_backoff;
+ Dict m_dict;
+};
+
+#endif // _CONTEXTS_CORPUS_HH