summaryrefslogtreecommitdiff
path: root/utils/dict.h
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-02-13 22:41:46 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2011-02-13 22:41:46 -0500
commita6ce17719ccd39d2537dc6155adaab7184a313a4 (patch)
tree28962ccb51f1985b7cd726f6d126c4e9f0d071f0 /utils/dict.h
parentd69cee7a46d63b585e319d93429df27717e5043c (diff)
phrasinator v1
Diffstat (limited to 'utils/dict.h')
-rw-r--r--utils/dict.h24
1 files changed, 24 insertions, 0 deletions
diff --git a/utils/dict.h b/utils/dict.h
index 348a97e3..75ea3def 100644
--- a/utils/dict.h
+++ b/utils/dict.h
@@ -21,6 +21,30 @@ class Dict {
inline int max() const { return words_.size(); }
+ static bool is_ws(char x) {
+ return (x == ' ' || x == '\t');
+ }
+
+ inline void ConvertWhitespaceDelimitedLine(const std::string& line, std::vector<int>* out) {
+ size_t cur = 0;
+ size_t last = 0;
+ int state = 0;
+ out->clear();
+ while(cur < line.size()) {
+ if (is_ws(line[cur++])) {
+ if (state == 0) continue;
+ out->push_back(Convert(line.substr(last, cur - last - 1)));
+ state = 0;
+ } else {
+ if (state == 1) continue;
+ last = cur - 1;
+ state = 1;
+ }
+ }
+ if (state == 1)
+ out->push_back(Convert(line.substr(last, cur - last)));
+ }
+
inline WordID Convert(const std::string& word, bool frozen = false) {
Map::iterator i = d_.find(word);
if (i == d_.end()) {