summaryrefslogtreecommitdiff
path: root/gi/clda/src/tdict.h
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-22 22:31:28 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-22 22:31:28 +0000
commitfcd49d500e2f07b084597cd72c53568ac46ef854 (patch)
treef7e91d0b409785e1938d6dd6df0b01d5c251c36f /gi/clda/src/tdict.h
parentefe0d24fa7dbca47825638a52f51977456153bd0 (diff)
chris's crappy lda
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@6 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/clda/src/tdict.h')
-rw-r--r--gi/clda/src/tdict.h49
1 files changed, 49 insertions, 0 deletions
diff --git a/gi/clda/src/tdict.h b/gi/clda/src/tdict.h
new file mode 100644
index 00000000..97f145a1
--- /dev/null
+++ b/gi/clda/src/tdict.h
@@ -0,0 +1,49 @@
+#ifndef _TDICT_H_
+#define _TDICT_H_
+
+#include <string>
+#include <vector>
+#include "wordid.h"
+#include "dict.h"
+
+class Vocab;
+
+struct TD {
+
+ static Dict dict_;
+ static std::string empty;
+ static std::string space;
+
+ static std::string GetString(const std::vector<WordID>& str) {
+ std::string res;
+ for (std::vector<WordID>::const_iterator i = str.begin(); i != str.end(); ++i)
+ res += (i == str.begin() ? empty : space) + TD::Convert(*i);
+ return res;
+ }
+
+ static void ConvertSentence(const std::string& sent, std::vector<WordID>* ids) {
+ std::string s = sent;
+ int last = 0;
+ ids->clear();
+ for (int i=0; i < s.size(); ++i)
+ if (s[i] == 32 || s[i] == '\t') {
+ s[i]=0;
+ if (last != i) {
+ ids->push_back(Convert(&s[last]));
+ }
+ last = i + 1;
+ }
+ if (last != s.size())
+ ids->push_back(Convert(&s[last]));
+ }
+
+ static WordID Convert(const std::string& s) {
+ return dict_.Convert(s);
+ }
+
+ static const std::string& Convert(const WordID& w) {
+ return dict_.Convert(w);
+ }
+};
+
+#endif