summaryrefslogtreecommitdiff
path: root/klm/util/tokenize_piece.hh
blob: ee1c7ab2bdb5389de4ec48eec4b6673646f21fe0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#ifndef UTIL_TOKENIZE_PIECE__
#define UTIL_TOKENIZE_PIECE__

#include "util/string_piece.hh"

#include <boost/iterator/iterator_facade.hpp>

/* Usage:
 *
 * for (PieceIterator<' '> i(" foo \r\n bar "); i; ++i) {
 *   std::cout << *i << "\n";
 * }
 *
 */

namespace util {

// Tokenize a StringPiece using an iterator interface.  boost::tokenizer doesn't work with StringPiece.
template <char d> class PieceIterator : public boost::iterator_facade<PieceIterator<d>, const StringPiece, boost::forward_traversal_tag> {
  public:
    // Default construct is end, which is also accessed by kEndPieceIterator;
    PieceIterator() {}

    explicit PieceIterator(const StringPiece &str)
      : after_(str) {
        increment();
      }

    bool operator!() const {
      return after_.data() == 0;
    }
    operator bool() const {
      return after_.data() != 0;
    }

    static PieceIterator<d> end() {
      return PieceIterator<d>();
    }

  private:
    friend class boost::iterator_core_access;

    void increment() {
      const char *start = after_.data();
      for (; (start != after_.data() + after_.size()) && (d == *start); ++start) {}
      if (start == after_.data() + after_.size()) {
        // End condition.
        after_.clear();
        return;
      }
      const char *finish = start;
      for (; (finish != after_.data() + after_.size()) && (d != *finish); ++finish) {}
      current_ = StringPiece(start, finish - start);
      after_ = StringPiece(finish, after_.data() + after_.size() - finish);
    }

    bool equal(const PieceIterator &other) const {
      return after_.data() == other.after_.data();
    }

    const StringPiece &dereference() const { return current_; }

    StringPiece current_;
    StringPiece after_;
};

} // namespace util

#endif // UTIL_TOKENIZE_PIECE__