diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2011-02-13 22:41:46 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2011-02-13 22:41:46 -0500 |
commit | 78f1644ae064dffabc59b1bf7bf3ded3dc3171db (patch) | |
tree | fe7c3116dd942f252173e2d4f8b1695c96783195 /utils/dict.h | |
parent | 8c4a347f5f224b0a50bbb1d0b250ee96aad31f72 (diff) |
phrasinator v1
Diffstat (limited to 'utils/dict.h')
-rw-r--r-- | utils/dict.h | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/utils/dict.h b/utils/dict.h index 348a97e3..75ea3def 100644 --- a/utils/dict.h +++ b/utils/dict.h @@ -21,6 +21,30 @@ class Dict { inline int max() const { return words_.size(); } + static bool is_ws(char x) { + return (x == ' ' || x == '\t'); + } + + inline void ConvertWhitespaceDelimitedLine(const std::string& line, std::vector<int>* out) { + size_t cur = 0; + size_t last = 0; + int state = 0; + out->clear(); + while(cur < line.size()) { + if (is_ws(line[cur++])) { + if (state == 0) continue; + out->push_back(Convert(line.substr(last, cur - last - 1))); + state = 0; + } else { + if (state == 1) continue; + last = cur - 1; + state = 1; + } + } + if (state == 1) + out->push_back(Convert(line.substr(last, cur - last))); + } + inline WordID Convert(const std::string& word, bool frozen = false) { Map::iterator i = d_.find(word); if (i == d_.end()) { |