diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-01-28 00:18:37 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-01-28 00:18:37 -0500 |
commit | 0ee2b44c5c0981358ade9ddab1d083bbe1de5daf (patch) | |
tree | 2dc8d9b9efb24b3a3b5c2bcaf0b55df30743d151 /klm/util/tokenize_piece.hh | |
parent | 2ac0704a463d45f0bfe23184a1ea9950d60fd546 (diff) | |
parent | 783c57b2d3312738ddcf992ac55ff750afe7cb47 (diff) |
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'klm/util/tokenize_piece.hh')
-rw-r--r-- | klm/util/tokenize_piece.hh | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/klm/util/tokenize_piece.hh b/klm/util/tokenize_piece.hh index a588c3fc..24eae8fb 100644 --- a/klm/util/tokenize_piece.hh +++ b/klm/util/tokenize_piece.hh @@ -58,6 +58,23 @@ class AnyCharacter { StringPiece chars_; }; +class BoolCharacter { + public: + BoolCharacter() {} + + explicit BoolCharacter(const bool *delimiter) { delimiter_ = delimiter; } + + StringPiece Find(const StringPiece &in) const { + for (const char *i = in.data(); i != in.data() + in.size(); ++i) { + if (delimiter_[static_cast<unsigned char>(*i)]) return StringPiece(i, 1); + } + return StringPiece(in.data() + in.size(), 0); + } + + private: + const bool *delimiter_; +}; + class AnyCharacterLast { public: AnyCharacterLast() {} |