summaryrefslogtreecommitdiff
path: root/klm/util/tokenize_piece.hh
diff options
context:
space:
mode:
Diffstat (limited to 'klm/util/tokenize_piece.hh')
-rw-r--r--klm/util/tokenize_piece.hh64
1 files changed, 14 insertions, 50 deletions
diff --git a/klm/util/tokenize_piece.hh b/klm/util/tokenize_piece.hh
index 413bda0b..c7e1c863 100644
--- a/klm/util/tokenize_piece.hh
+++ b/klm/util/tokenize_piece.hh
@@ -1,6 +1,7 @@
#ifndef UTIL_TOKENIZE_PIECE__
#define UTIL_TOKENIZE_PIECE__
+#include "util/exception.hh"
#include "util/string_piece.hh"
#include <boost/iterator/iterator_facade.hpp>
@@ -8,63 +9,25 @@
#include <algorithm>
#include <iostream>
-/* Usage:
- *
- * for (PieceIterator<' '> i(" foo \r\n bar "); i; ++i) {
- * std::cout << *i << "\n";
- * }
- *
- */
-
namespace util {
-// Tokenize a StringPiece using an iterator interface. boost::tokenizer doesn't work with StringPiece.
-template <char d> class PieceIterator : public boost::iterator_facade<PieceIterator<d>, const StringPiece, boost::forward_traversal_tag> {
+// Thrown on dereference when out of tokens to parse
+class OutOfTokens : public Exception {
public:
- // Default construct is end, which is also accessed by kEndPieceIterator;
- PieceIterator() {}
-
- explicit PieceIterator(const StringPiece &str)
- : after_(str) {
- increment();
- }
+ OutOfTokens() throw() {}
+ ~OutOfTokens() throw() {}
+};
- bool operator!() const {
- return after_.data() == 0;
- }
- operator bool() const {
- return after_.data() != 0;
- }
+class SingleCharacter {
+ public:
+ explicit SingleCharacter(char delim) : delim_(delim) {}
- static PieceIterator<d> end() {
- return PieceIterator<d>();
+ StringPiece Find(const StringPiece &in) const {
+ return StringPiece(std::find(in.data(), in.data() + in.size(), delim_), 1);
}
private:
- friend class boost::iterator_core_access;
-
- void increment() {
- const char *start = after_.data();
- for (; (start != after_.data() + after_.size()) && (d == *start); ++start) {}
- if (start == after_.data() + after_.size()) {
- // End condition.
- after_.clear();
- return;
- }
- const char *finish = start;
- for (; (finish != after_.data() + after_.size()) && (d != *finish); ++finish) {}
- current_ = StringPiece(start, finish - start);
- after_ = StringPiece(finish, after_.data() + after_.size() - finish);
- }
-
- bool equal(const PieceIterator &other) const {
- return after_.data() == other.after_.data();
- }
-
- const StringPiece &dereference() const { return current_; }
-
- StringPiece current_;
- StringPiece after_;
+ char delim_;
};
class MultiCharacter {
@@ -95,7 +58,7 @@ template <class Find, bool SkipEmpty = false> class TokenIter : public boost::it
public:
TokenIter() {}
- TokenIter(const StringPiece &str, const Find &finder) : after_(str), finder_(finder) {
+ template <class Construct> TokenIter(const StringPiece &str, const Construct &construct) : after_(str), finder_(construct) {
increment();
}
@@ -130,6 +93,7 @@ template <class Find, bool SkipEmpty = false> class TokenIter : public boost::it
}
const StringPiece &dereference() const {
+ UTIL_THROW_IF(!current_.data(), OutOfTokens, "Ran out of tokens");
return current_;
}