From eecc4cbf5a6e28f910130cd4a58444e9d4701ea9 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Mon, 27 Jan 2014 17:42:19 -0800 Subject: KenLM 5cc905bc2d214efa7de2db56a9a672b749a95591 --- klm/util/tokenize_piece.hh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'klm/util/tokenize_piece.hh') diff --git a/klm/util/tokenize_piece.hh b/klm/util/tokenize_piece.hh index a588c3fc..24eae8fb 100644 --- a/klm/util/tokenize_piece.hh +++ b/klm/util/tokenize_piece.hh @@ -58,6 +58,23 @@ class AnyCharacter { StringPiece chars_; }; +class BoolCharacter { + public: + BoolCharacter() {} + + explicit BoolCharacter(const bool *delimiter) { delimiter_ = delimiter; } + + StringPiece Find(const StringPiece &in) const { + for (const char *i = in.data(); i != in.data() + in.size(); ++i) { + if (delimiter_[static_cast(*i)]) return StringPiece(i, 1); + } + return StringPiece(in.data() + in.size(), 0); + } + + private: + const bool *delimiter_; +}; + class AnyCharacterLast { public: AnyCharacterLast() {} -- cgit v1.2.3