From bc95fedbaa083d557840db6ac2cbf14e2a3eccce Mon Sep 17 00:00:00 2001
From: Kenneth Heafield <kheafiel@cluster12.lti.ece.cmu.local>
Date: Fri, 20 May 2011 16:19:04 -0400
Subject: kenlm update including being nicer to NFS

---
 klm/lm/search_hashed.cc | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'klm/lm/search_hashed.cc')
diff --git a/klm/lm/search_hashed.cc b/klm/lm/search_hashed.cc
index bb3b955a..eaad59ab 100644
--- a/klm/lm/search_hashed.cc
+++ b/klm/lm/search_hashed.cc
@@ -48,7 +48,7 @@ class ActivateUnigram {
     ProbBackoff *modify_;
 };
 
-template <class Voc, class Store, class Middle, class Activate> void ReadNGrams(util::FilePiece &f, const unsigned int n, const size_t count, const Voc &vocab, std::vector<Middle> &middle, Activate activate, Store &store) {
+template <class Voc, class Store, class Middle, class Activate> void ReadNGrams(util::FilePiece &f, const unsigned int n, const size_t count, const Voc &vocab, std::vector<Middle> &middle, Activate activate, Store &store, PositiveProbWarn &warn) {
   
   ReadNGramHeader(f, n);
   ProbBackoff blank;
@@ -61,7 +61,7 @@ template <class Voc, class Store, class Middle, class Activate> void ReadNGrams(
   typename Store::Packing::Value value;
   typename Middle::ConstIterator found;
   for (size_t i = 0; i < count; ++i) {
-    ReadNGram(f, n, vocab, vocab_ids, value);
+    ReadNGram(f, n, vocab, vocab_ids, value, warn);
     keys[0] = detail::CombineWordHash(static_cast<uint64_t>(*vocab_ids), vocab_ids[1]);
     for (unsigned int h = 1; h < n - 1; ++h) {
       keys[h] = detail::CombineWordHash(keys[h-1], vocab_ids[h+1]);
@@ -85,20 +85,22 @@ template <class MiddleT, class LongestT> template <class Voc> void TemplateHashe
   // TODO: fix sorted.
   SetupMemory(GrowForSearch(config, Size(counts, config), backing), counts, config);
 
-  Read1Grams(f, counts[0], vocab, unigram.Raw());
+  PositiveProbWarn warn(config.positive_log_probability);
+
+  Read1Grams(f, counts[0], vocab, unigram.Raw(), warn);
   CheckSpecials(config, vocab);
 
   try {
     if (counts.size() > 2) {
-      ReadNGrams(f, 2, counts[1], vocab, middle, ActivateUnigram(unigram.Raw()), middle[0]);
+      ReadNGrams(f, 2, counts[1], vocab, middle, ActivateUnigram(unigram.Raw()), middle[0], warn);
     }
     for (unsigned int n = 3; n < counts.size(); ++n) {
-      ReadNGrams(f, n, counts[n-1], vocab, middle, ActivateLowerMiddle<Middle>(middle[n-3]), middle[n-2]);
+      ReadNGrams(f, n, counts[n-1], vocab, middle, ActivateLowerMiddle<Middle>(middle[n-3]), middle[n-2], warn);
     }
     if (counts.size() > 2) {
-      ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, middle, ActivateLowerMiddle<Middle>(middle.back()), longest);
+      ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, middle, ActivateLowerMiddle<Middle>(middle.back()), longest, warn);
     } else {
-      ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, middle, ActivateUnigram(unigram.Raw()), longest);
+      ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, middle, ActivateUnigram(unigram.Raw()), longest, warn);
     }
   } catch (util::ProbingSizeException &e) {
     UTIL_THROW(util::ProbingSizeException, "Avoid pruning n-grams like \"bar baz quux\" when \"foo bar baz quux\" is still in the model.  KenLM will work when this pruning happens, but the probing model assumes these events are rare enough that using blank space in the probing hash table will cover all of them.  Increase probing_multiplier (-p to build_binary) to add more blank spaces.\n");
-- 
cgit v1.2.3