summaryrefslogtreecommitdiff
path: root/klm/util/tokenize_piece.hh
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2014-01-28 00:18:37 -0500
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2014-01-28 00:18:37 -0500
commit7f69662660746eb8559bd3f041bdc4184f639fad (patch)
tree5336b9626c8ce12dbd366cc637fec0987742bd47 /klm/util/tokenize_piece.hh
parentdba017629ff32bbd0af8770ddee88082bb626bee (diff)
parenteecc4cbf5a6e28f910130cd4a58444e9d4701ea9 (diff)
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'klm/util/tokenize_piece.hh')
-rw-r--r--klm/util/tokenize_piece.hh17
1 files changed, 17 insertions, 0 deletions
diff --git a/klm/util/tokenize_piece.hh b/klm/util/tokenize_piece.hh
index a588c3fc..24eae8fb 100644
--- a/klm/util/tokenize_piece.hh
+++ b/klm/util/tokenize_piece.hh
@@ -58,6 +58,23 @@ class AnyCharacter {
StringPiece chars_;
};
+class BoolCharacter {
+ public:
+ BoolCharacter() {}
+
+ explicit BoolCharacter(const bool *delimiter) { delimiter_ = delimiter; }
+
+ StringPiece Find(const StringPiece &in) const {
+ for (const char *i = in.data(); i != in.data() + in.size(); ++i) {
+ if (delimiter_[static_cast<unsigned char>(*i)]) return StringPiece(i, 1);
+ }
+ return StringPiece(in.data() + in.size(), 0);
+ }
+
+ private:
+ const bool *delimiter_;
+};
+
class AnyCharacterLast {
public:
AnyCharacterLast() {}