summaryrefslogtreecommitdiff
path: root/klm/util/tokenize_piece_test.cc
blob: e07ebcf5e77400309ea2c8d9515082e306c317ff (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#include "util/tokenize_piece.hh"
#include "util/string_piece.hh"

#define BOOST_TEST_MODULE TokenIteratorTest
#include <boost/test/unit_test.hpp>

#include <iostream>

namespace util {
namespace {

BOOST_AUTO_TEST_CASE(simple) {
  PieceIterator<' '> it("single spaced words.");
  BOOST_REQUIRE(it);
  BOOST_CHECK_EQUAL(StringPiece("single"), *it);
  ++it;
  BOOST_REQUIRE(it);
  BOOST_CHECK_EQUAL(StringPiece("spaced"), *it);
  ++it;
  BOOST_REQUIRE(it);
  BOOST_CHECK_EQUAL(StringPiece("words."), *it);
  ++it;
  BOOST_CHECK(!it);
}

BOOST_AUTO_TEST_CASE(null_delimiter) {
  const char str[] = "\0first\0\0second\0\0\0third\0fourth\0\0\0";
  PieceIterator<'\0'> it(StringPiece(str, sizeof(str) - 1));
  BOOST_REQUIRE(it);
  BOOST_CHECK_EQUAL(StringPiece("first"), *it);
  ++it;
  BOOST_REQUIRE(it);
  BOOST_CHECK_EQUAL(StringPiece("second"), *it);
  ++it;
  BOOST_REQUIRE(it);
  BOOST_CHECK_EQUAL(StringPiece("third"), *it);
  ++it;
  BOOST_REQUIRE(it);
  BOOST_CHECK_EQUAL(StringPiece("fourth"), *it);
  ++it;
  BOOST_CHECK(!it);
}

BOOST_AUTO_TEST_CASE(null_entries) {
  const char str[] = "\0split\0\0 \0me\0 ";
  PieceIterator<' '> it(StringPiece(str, sizeof(str) - 1));
  BOOST_REQUIRE(it);
  const char first[] = "\0split\0\0";
  BOOST_CHECK_EQUAL(StringPiece(first, sizeof(first) - 1), *it);
  ++it;
  BOOST_REQUIRE(it);
  const char second[] = "\0me\0";
  BOOST_CHECK_EQUAL(StringPiece(second, sizeof(second) - 1), *it);
  ++it;
  BOOST_CHECK(!it);
}

/*BOOST_AUTO_TEST_CASE(pipe_pipe_none) {
  const char str[] = "nodelimit at all";
  TokenIter<MultiCharacter> it(str, MultiCharacter("|||"));
  BOOST_REQUIRE(it);
  BOOST_CHECK_EQUAL(StringPiece(str), *it);
  ++it;
  BOOST_CHECK(!it);
}
BOOST_AUTO_TEST_CASE(pipe_pipe_two) {
  const char str[] = "|||";
  TokenIter<MultiCharacter> it(str, MultiCharacter("|||"));
  BOOST_REQUIRE(it);
  BOOST_CHECK_EQUAL(StringPiece(), *it);
  ++it;
  BOOST_REQUIRE(it);
  BOOST_CHECK_EQUAL(StringPiece(), *it);
  ++it;
  BOOST_CHECK(!it);
}

BOOST_AUTO_TEST_CASE(remove_empty) {
  const char str[] = "|||";
  TokenIter<MultiCharacter, true> it(str, MultiCharacter("|||"));
  BOOST_CHECK(!it);
}*/

BOOST_AUTO_TEST_CASE(remove_empty_keep) {
  const char str[] = " |||";
  TokenIter<MultiCharacter, true> it(str, MultiCharacter("|||"));
  BOOST_REQUIRE(it);
  BOOST_CHECK_EQUAL(StringPiece(" "), *it);
  ++it;
  BOOST_CHECK(!it);
}

} // namespace
} // namespace util