Update to faster but less cute search

author: Kenneth Heafield <github@kheafield.com> 2012-10-14 10:46:34 +0100
committer: Kenneth Heafield <github@kheafield.com> 2012-10-14 10:46:34 +0100
commit: 9b99cb844e3e379b557ff8578df27893ce147f1a (patch)
tree: ca60a3b35312cf6d9741cd05ca958714d2c4e17c /klm/lm
parent: 28403a7d3cbca2de743a7d654ffb9e1600ce7c5c (diff)
1 files changed, 37 insertions, 0 deletions
diff --git a/klm/lm/fragment.cc b/klm/lm/fragment.cc
new file mode 100644
index 00000000..0267cd4e
--- /dev/null
+++ b/klm/lm/fragment.cc
@@ -0,0 +1,37 @@
+#include "lm/binary_format.hh"
+#include "lm/model.hh"
+#include "lm/left.hh"
+#include "util/tokenize_piece.hh"
+
+template <class Model> void Query(const char *name) {
+  Model model(name);
+  std::string line;
+  lm::ngram::ChartState ignored;
+  while (getline(std::cin, line)) {
+    lm::ngram::RuleScore<Model> scorer(model, ignored);
+    for (util::TokenIter<util::SingleCharacter, true> i(line, ' '); i; ++i) {
+      scorer.Terminal(model.GetVocabulary().Index(*i));
+    }
+    std::cout << scorer.Finish() << '\n';
+  }
+}
+
+int main(int argc, char *argv[]) {
+  if (argc != 2) {
+    std::cerr << "Expected model file name." << std::endl;
+    return 1;
+  }
+  const char *name = argv[1];
+  lm::ngram::ModelType model_type = lm::ngram::PROBING;
+  lm::ngram::RecognizeBinary(name, model_type);
+  switch (model_type) {
+    case lm::ngram::PROBING:
+      Query<lm::ngram::ProbingModel>(name);
+      break;
+    case lm::ngram::REST_PROBING:
+      Query<lm::ngram::RestProbingModel>(name);
+      break;
+    default:
+      std::cerr << "Model type not supported yet." << std::endl;
+  }
+}
author	Kenneth Heafield <github@kheafield.com>	2012-10-14 10:46:34 +0100
committer	Kenneth Heafield <github@kheafield.com>	2012-10-14 10:46:34 +0100
commit	9b99cb844e3e379b557ff8578df27893ce147f1a (patch)
tree	ca60a3b35312cf6d9741cd05ca958714d2c4e17c /klm/lm
parent	28403a7d3cbca2de743a7d654ffb9e1600ce7c5c (diff)