diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-06-22 22:31:28 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-06-22 22:31:28 +0000 |
commit | fcd49d500e2f07b084597cd72c53568ac46ef854 (patch) | |
tree | f7e91d0b409785e1938d6dd6df0b01d5c251c36f /gi/clda/src/tdict.h | |
parent | efe0d24fa7dbca47825638a52f51977456153bd0 (diff) |
chris's crappy lda
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@6 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/clda/src/tdict.h')
-rw-r--r-- | gi/clda/src/tdict.h | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/gi/clda/src/tdict.h b/gi/clda/src/tdict.h new file mode 100644 index 00000000..97f145a1 --- /dev/null +++ b/gi/clda/src/tdict.h @@ -0,0 +1,49 @@ +#ifndef _TDICT_H_ +#define _TDICT_H_ + +#include <string> +#include <vector> +#include "wordid.h" +#include "dict.h" + +class Vocab; + +struct TD { + + static Dict dict_; + static std::string empty; + static std::string space; + + static std::string GetString(const std::vector<WordID>& str) { + std::string res; + for (std::vector<WordID>::const_iterator i = str.begin(); i != str.end(); ++i) + res += (i == str.begin() ? empty : space) + TD::Convert(*i); + return res; + } + + static void ConvertSentence(const std::string& sent, std::vector<WordID>* ids) { + std::string s = sent; + int last = 0; + ids->clear(); + for (int i=0; i < s.size(); ++i) + if (s[i] == 32 || s[i] == '\t') { + s[i]=0; + if (last != i) { + ids->push_back(Convert(&s[last])); + } + last = i + 1; + } + if (last != s.size()) + ids->push_back(Convert(&s[last])); + } + + static WordID Convert(const std::string& s) { + return dict_.Convert(s); + } + + static const std::string& Convert(const WordID& w) { + return dict_.Convert(w); + } +}; + +#endif |