summaryrefslogtreecommitdiff
path: root/klm/util/tokenize_piece.hh
diff options
context:
space:
mode:
authorKenneth Heafield <kenlm@kheafield.com>2011-09-21 18:23:50 -0400
committerKenneth Heafield <kenlm@kheafield.com>2011-09-21 18:23:50 -0400
commitf111672dd611f78656fceb3df3729a290453ef56 (patch)
treeb358908f21eba7c63e0cb51dee879b2e1dba4b87 /klm/util/tokenize_piece.hh
parent388081290e99fdd6eacc9d761ebfdea69647fa72 (diff)
Updated kenlm. Includes left state support but not the cdec-side use of it. Updated binary format.
Diffstat (limited to 'klm/util/tokenize_piece.hh')
-rw-r--r--klm/util/tokenize_piece.hh69
1 files changed, 69 insertions, 0 deletions
diff --git a/klm/util/tokenize_piece.hh b/klm/util/tokenize_piece.hh
new file mode 100644
index 00000000..ee1c7ab2
--- /dev/null
+++ b/klm/util/tokenize_piece.hh
@@ -0,0 +1,69 @@
+#ifndef UTIL_TOKENIZE_PIECE__
+#define UTIL_TOKENIZE_PIECE__
+
+#include "util/string_piece.hh"
+
+#include <boost/iterator/iterator_facade.hpp>
+
+/* Usage:
+ *
+ * for (PieceIterator<' '> i(" foo \r\n bar "); i; ++i) {
+ * std::cout << *i << "\n";
+ * }
+ *
+ */
+
+namespace util {
+
+// Tokenize a StringPiece using an iterator interface. boost::tokenizer doesn't work with StringPiece.
+template <char d> class PieceIterator : public boost::iterator_facade<PieceIterator<d>, const StringPiece, boost::forward_traversal_tag> {
+ public:
+ // Default construct is end, which is also accessed by kEndPieceIterator;
+ PieceIterator() {}
+
+ explicit PieceIterator(const StringPiece &str)
+ : after_(str) {
+ increment();
+ }
+
+ bool operator!() const {
+ return after_.data() == 0;
+ }
+ operator bool() const {
+ return after_.data() != 0;
+ }
+
+ static PieceIterator<d> end() {
+ return PieceIterator<d>();
+ }
+
+ private:
+ friend class boost::iterator_core_access;
+
+ void increment() {
+ const char *start = after_.data();
+ for (; (start != after_.data() + after_.size()) && (d == *start); ++start) {}
+ if (start == after_.data() + after_.size()) {
+ // End condition.
+ after_.clear();
+ return;
+ }
+ const char *finish = start;
+ for (; (finish != after_.data() + after_.size()) && (d != *finish); ++finish) {}
+ current_ = StringPiece(start, finish - start);
+ after_ = StringPiece(finish, after_.data() + after_.size() - finish);
+ }
+
+ bool equal(const PieceIterator &other) const {
+ return after_.data() == other.after_.data();
+ }
+
+ const StringPiece &dereference() const { return current_; }
+
+ StringPiece current_;
+ StringPiece after_;
+};
+
+} // namespace util
+
+#endif // UTIL_TOKENIZE_PIECE__