From c4ade3091b812ca135ae6520fa7173e1bbf28754 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 25 Jan 2011 22:30:48 +0200 Subject: update kenlm --- klm/lm/ngram_query.cc | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'klm/lm/ngram_query.cc') diff --git a/klm/lm/ngram_query.cc b/klm/lm/ngram_query.cc index 3fa8cb03..d6da02e3 100644 --- a/klm/lm/ngram_query.cc +++ b/klm/lm/ngram_query.cc @@ -6,6 +6,8 @@ #include #include +#include + #include #include @@ -43,35 +45,38 @@ template void Query(const Model &model) { state = model.BeginSentenceState(); float total = 0.0; bool got = false; + unsigned int oov = 0; while (std::cin >> word) { got = true; lm::WordIndex vocab = model.GetVocabulary().Index(word); + if (vocab == 0) ++oov; ret = model.FullScore(state, vocab, out); total += ret.prob; std::cout << word << '=' << vocab << ' ' << static_cast(ret.ngram_length) << ' ' << ret.prob << '\n'; state = out; - if (std::cin.get() == '\n') break; + char c; + while (true) { + c = std::cin.get(); + if (!std::cin) break; + if (c == '\n') break; + if (!isspace(c)) { + std::cin.unget(); + break; + } + } + if (c == '\n') break; } if (!got && !std::cin) break; ret = model.FullScore(state, model.GetVocabulary().EndSentence(), out); total += ret.prob; std::cout << "=" << model.GetVocabulary().EndSentence() << ' ' << static_cast(ret.ngram_length) << ' ' << ret.prob << '\n'; - std::cout << "Total: " << total << '\n'; + std::cout << "Total: " << total << " OOV: " << oov << '\n'; } PrintUsage("After queries:\n"); } -class PrintVocab : public lm::ngram::EnumerateVocab { - public: - void Add(lm::WordIndex index, const StringPiece &str) { - std::cerr << "vocab " << index << ' ' << str << '\n'; - } -}; - template void Query(const char *name) { lm::ngram::Config config; - PrintVocab printer; - config.enumerate_vocab = &printer; Model model(name, config); Query(model); } -- cgit v1.2.3