summaryrefslogtreecommitdiff
path: root/klm/util/tokenize_piece_test.cc
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-11-07 18:10:00 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2011-11-07 18:10:00 -0500
commit70c65e2a4df480f16a5f43de71024c4f36659b8e (patch)
tree7807b86743760c7c1c78e066ddf26ed91cbd6d3d /klm/util/tokenize_piece_test.cc
parent9ed514b2d10c9507a8399623306582da45b78800 (diff)
parent9b9d565a9300b3a2b80bbb952bbf88dd36860ce2 (diff)
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'klm/util/tokenize_piece_test.cc')
-rw-r--r--klm/util/tokenize_piece_test.cc94
1 files changed, 94 insertions, 0 deletions
diff --git a/klm/util/tokenize_piece_test.cc b/klm/util/tokenize_piece_test.cc
new file mode 100644
index 00000000..e07ebcf5
--- /dev/null
+++ b/klm/util/tokenize_piece_test.cc
@@ -0,0 +1,94 @@
+#include "util/tokenize_piece.hh"
+#include "util/string_piece.hh"
+
+#define BOOST_TEST_MODULE TokenIteratorTest
+#include <boost/test/unit_test.hpp>
+
+#include <iostream>
+
+namespace util {
+namespace {
+
+BOOST_AUTO_TEST_CASE(simple) {
+ PieceIterator<' '> it("single spaced words.");
+ BOOST_REQUIRE(it);
+ BOOST_CHECK_EQUAL(StringPiece("single"), *it);
+ ++it;
+ BOOST_REQUIRE(it);
+ BOOST_CHECK_EQUAL(StringPiece("spaced"), *it);
+ ++it;
+ BOOST_REQUIRE(it);
+ BOOST_CHECK_EQUAL(StringPiece("words."), *it);
+ ++it;
+ BOOST_CHECK(!it);
+}
+
+BOOST_AUTO_TEST_CASE(null_delimiter) {
+ const char str[] = "\0first\0\0second\0\0\0third\0fourth\0\0\0";
+ PieceIterator<'\0'> it(StringPiece(str, sizeof(str) - 1));
+ BOOST_REQUIRE(it);
+ BOOST_CHECK_EQUAL(StringPiece("first"), *it);
+ ++it;
+ BOOST_REQUIRE(it);
+ BOOST_CHECK_EQUAL(StringPiece("second"), *it);
+ ++it;
+ BOOST_REQUIRE(it);
+ BOOST_CHECK_EQUAL(StringPiece("third"), *it);
+ ++it;
+ BOOST_REQUIRE(it);
+ BOOST_CHECK_EQUAL(StringPiece("fourth"), *it);
+ ++it;
+ BOOST_CHECK(!it);
+}
+
+BOOST_AUTO_TEST_CASE(null_entries) {
+ const char str[] = "\0split\0\0 \0me\0 ";
+ PieceIterator<' '> it(StringPiece(str, sizeof(str) - 1));
+ BOOST_REQUIRE(it);
+ const char first[] = "\0split\0\0";
+ BOOST_CHECK_EQUAL(StringPiece(first, sizeof(first) - 1), *it);
+ ++it;
+ BOOST_REQUIRE(it);
+ const char second[] = "\0me\0";
+ BOOST_CHECK_EQUAL(StringPiece(second, sizeof(second) - 1), *it);
+ ++it;
+ BOOST_CHECK(!it);
+}
+
+/*BOOST_AUTO_TEST_CASE(pipe_pipe_none) {
+ const char str[] = "nodelimit at all";
+ TokenIter<MultiCharacter> it(str, MultiCharacter("|||"));
+ BOOST_REQUIRE(it);
+ BOOST_CHECK_EQUAL(StringPiece(str), *it);
+ ++it;
+ BOOST_CHECK(!it);
+}
+BOOST_AUTO_TEST_CASE(pipe_pipe_two) {
+ const char str[] = "|||";
+ TokenIter<MultiCharacter> it(str, MultiCharacter("|||"));
+ BOOST_REQUIRE(it);
+ BOOST_CHECK_EQUAL(StringPiece(), *it);
+ ++it;
+ BOOST_REQUIRE(it);
+ BOOST_CHECK_EQUAL(StringPiece(), *it);
+ ++it;
+ BOOST_CHECK(!it);
+}
+
+BOOST_AUTO_TEST_CASE(remove_empty) {
+ const char str[] = "|||";
+ TokenIter<MultiCharacter, true> it(str, MultiCharacter("|||"));
+ BOOST_CHECK(!it);
+}*/
+
+BOOST_AUTO_TEST_CASE(remove_empty_keep) {
+ const char str[] = " |||";
+ TokenIter<MultiCharacter, true> it(str, MultiCharacter("|||"));
+ BOOST_REQUIRE(it);
+ BOOST_CHECK_EQUAL(StringPiece(" "), *it);
+ ++it;
+ BOOST_CHECK(!it);
+}
+
+} // namespace
+} // namespace util