summaryrefslogtreecommitdiff
path: root/klm/lm/ngram_test.cc
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-10-18 23:24:01 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-10-18 23:24:01 +0000
commite0ef743090038ee02d656cee11debd2246624ba0 (patch)
treee5e1d32402c8dcb490c574e24c087c56d4cc172e /klm/lm/ngram_test.cc
parent0c2514868f58bbfe422aa275e2905182cf2f57eb (diff)
kenneth's LM preliminary integration
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@681 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'klm/lm/ngram_test.cc')
-rw-r--r--klm/lm/ngram_test.cc91
1 files changed, 91 insertions, 0 deletions
diff --git a/klm/lm/ngram_test.cc b/klm/lm/ngram_test.cc
new file mode 100644
index 00000000..031e0348
--- /dev/null
+++ b/klm/lm/ngram_test.cc
@@ -0,0 +1,91 @@
+#include "lm/ngram.hh"
+
+#include <stdlib.h>
+
+#define BOOST_TEST_MODULE NGramTest
+#include <boost/test/unit_test.hpp>
+
+namespace lm {
+namespace ngram {
+namespace {
+
+#define StartTest(word, ngram, score) \
+ ret = model.FullScore( \
+ state, \
+ model.GetVocabulary().Index(word), \
+ out);\
+ BOOST_CHECK_CLOSE(score, ret.prob, 0.001); \
+ BOOST_CHECK_EQUAL(static_cast<unsigned int>(ngram), ret.ngram_length); \
+ BOOST_CHECK_EQUAL(std::min<unsigned char>(ngram, 5 - 1), out.valid_length_);
+
+#define AppendTest(word, ngram, score) \
+ StartTest(word, ngram, score) \
+ state = out;
+
+template <class M> void Starters(M &model) {
+ FullScoreReturn ret;
+ Model::State state(model.BeginSentenceState());
+ Model::State out;
+
+ StartTest("looking", 2, -0.4846522);
+
+ // , probability plus <s> backoff
+ StartTest(",", 1, -1.383514 + -0.4149733);
+ // <unk> probability plus <s> backoff
+ StartTest("this_is_not_found", 0, -1.995635 + -0.4149733);
+}
+
+template <class M> void Continuation(M &model) {
+ FullScoreReturn ret;
+ Model::State state(model.BeginSentenceState());
+ Model::State out;
+
+ AppendTest("looking", 2, -0.484652);
+ AppendTest("on", 3, -0.348837);
+ AppendTest("a", 4, -0.0155266);
+ AppendTest("little", 5, -0.00306122);
+ State preserve = state;
+ AppendTest("the", 1, -4.04005);
+ AppendTest("biarritz", 1, -1.9889);
+ AppendTest("not_found", 0, -2.29666);
+ AppendTest("more", 1, -1.20632);
+ AppendTest(".", 2, -0.51363);
+ AppendTest("</s>", 3, -0.0191651);
+
+ state = preserve;
+ AppendTest("more", 5, -0.00181395);
+ AppendTest("loin", 5, -0.0432557);
+}
+
+BOOST_AUTO_TEST_CASE(starters_probing) { Model m("test.arpa"); Starters(m); }
+BOOST_AUTO_TEST_CASE(continuation_probing) { Model m("test.arpa"); Continuation(m); }
+BOOST_AUTO_TEST_CASE(starters_sorted) { SortedModel m("test.arpa"); Starters(m); }
+BOOST_AUTO_TEST_CASE(continuation_sorted) { SortedModel m("test.arpa"); Continuation(m); }
+
+BOOST_AUTO_TEST_CASE(write_and_read_probing) {
+ Config config;
+ config.write_mmap = "test.binary";
+ {
+ Model copy_model("test.arpa", config);
+ }
+ Model binary("test.binary");
+ Starters(binary);
+ Continuation(binary);
+}
+
+BOOST_AUTO_TEST_CASE(write_and_read_sorted) {
+ Config config;
+ config.write_mmap = "test.binary";
+ config.prefault = true;
+ {
+ SortedModel copy_model("test.arpa", config);
+ }
+ SortedModel binary("test.binary");
+ Starters(binary);
+ Continuation(binary);
+}
+
+
+} // namespace
+} // namespace ngram
+} // namespace lm