diff options
| author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-11-10 02:02:04 +0000 | 
|---|---|---|
| committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-11-10 02:02:04 +0000 | 
| commit | 15b03336564d5e57e50693f19dd81b45076af5d4 (patch) | |
| tree | c2072893a43f4c75f0ad5ebe3080bfa901faf18f /klm/lm/ngram.cc | |
| parent | 1336aecfe930546f8836ffe65dd5ff78434084eb (diff) | |
new version of klm
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@706 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'klm/lm/ngram.cc')
| -rw-r--r-- | klm/lm/ngram.cc | 522 | 
1 files changed, 0 insertions, 522 deletions
| diff --git a/klm/lm/ngram.cc b/klm/lm/ngram.cc deleted file mode 100644 index a87c82aa..00000000 --- a/klm/lm/ngram.cc +++ /dev/null @@ -1,522 +0,0 @@ -#include "lm/ngram.hh" - -#include "lm/exception.hh" -#include "util/file_piece.hh" -#include "util/joint_sort.hh" -#include "util/murmur_hash.hh" -#include "util/probing_hash_table.hh" - -#include <algorithm> -#include <functional> -#include <numeric> -#include <limits> -#include <string> - -#include <cmath> -#include <fcntl.h> -#include <errno.h> -#include <stdlib.h> -#include <sys/mman.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> - -namespace lm { -namespace ngram { - -size_t hash_value(const State &state) { -  return util::MurmurHashNative(state.history_, sizeof(WordIndex) * state.valid_length_); -} - -namespace detail { -uint64_t HashForVocab(const char *str, std::size_t len) { -  // This proved faster than Boost's hash in speed trials: total load time Murmur 67090000, Boost 72210000 -  // Chose to use 64A instead of native so binary format will be portable across 64 and 32 bit.   -  return util::MurmurHash64A(str, len, 0); -} -  -void Prob::SetBackoff(float to) { -  UTIL_THROW(FormatLoadException, "Attempt to set backoff " << to << " for the highest order n-gram"); -} - -// Normally static initialization is a bad idea but MurmurHash is pure arithmetic, so this is ok.   -const uint64_t kUnknownHash = HashForVocab("<unk>", 5); -// Sadly some LMs have <UNK>.   -const uint64_t kUnknownCapHash = HashForVocab("<UNK>", 5); - -} // namespace detail - -SortedVocabulary::SortedVocabulary() : begin_(NULL), end_(NULL) {} - -std::size_t SortedVocabulary::Size(std::size_t entries, float ignored) { -  // Lead with the number of entries.   -  return sizeof(uint64_t) + sizeof(Entry) * entries; -} - -void SortedVocabulary::Init(void *start, std::size_t allocated, std::size_t entries) { -  assert(allocated >= Size(entries)); -  // Leave space for number of entries.   -  begin_ = reinterpret_cast<Entry*>(reinterpret_cast<uint64_t*>(start) + 1); -  end_ = begin_; -  saw_unk_ = false; -} - -WordIndex SortedVocabulary::Insert(const StringPiece &str) { -  uint64_t hashed = detail::HashForVocab(str); -  if (hashed == detail::kUnknownHash || hashed == detail::kUnknownCapHash) { -    saw_unk_ = true; -    return 0; -  } -  end_->key = hashed; -  ++end_; -  // This is 1 + the offset where it was inserted to make room for unk.   -  return end_ - begin_; -} - -bool SortedVocabulary::FinishedLoading(detail::ProbBackoff *reorder_vocab) { -  util::JointSort(begin_, end_, reorder_vocab + 1); -  SetSpecial(Index("<s>"), Index("</s>"), 0, end_ - begin_ + 1); -  // Save size.   -  *(reinterpret_cast<uint64_t*>(begin_) - 1) = end_ - begin_; -  return saw_unk_; -} - -void SortedVocabulary::LoadedBinary() { -  end_ = begin_ + *(reinterpret_cast<const uint64_t*>(begin_) - 1); -  SetSpecial(Index("<s>"), Index("</s>"), 0, end_ - begin_ + 1); -} - -namespace detail { - -template <class Search> MapVocabulary<Search>::MapVocabulary() {} - -template <class Search> void MapVocabulary<Search>::Init(void *start, std::size_t allocated, std::size_t entries) { -  lookup_ = Lookup(start, allocated); -  available_ = 1; -  // Later if available_ != expected_available_ then we can throw UnknownMissingException. -  saw_unk_ = false; -} - -template <class Search> WordIndex MapVocabulary<Search>::Insert(const StringPiece &str) { -  uint64_t hashed = HashForVocab(str); -  // Prevent unknown from going into the table.   -  if (hashed == kUnknownHash || hashed == kUnknownCapHash) { -    saw_unk_ = true; -    return 0; -  } else { -    lookup_.Insert(Lookup::Packing::Make(hashed, available_)); -    return available_++; -  } -} - -template <class Search> bool MapVocabulary<Search>::FinishedLoading(ProbBackoff *reorder_vocab) { -  lookup_.FinishedInserting(); -  SetSpecial(Index("<s>"), Index("</s>"), 0, available_); -  return saw_unk_; -} - -template <class Search> void MapVocabulary<Search>::LoadedBinary() { -  lookup_.LoadedBinary(); -  SetSpecial(Index("<s>"), Index("</s>"), 0, available_); -} - -/* All of the entropy is in low order bits and boost::hash does poorly with - * these.  Odd numbers near 2^64 chosen by mashing on the keyboard.  There is a - * stable point: 0.  But 0 is <unk> which won't be queried here anyway.   - */ -inline uint64_t CombineWordHash(uint64_t current, const WordIndex next) { -  uint64_t ret = (current * 8978948897894561157ULL) ^ (static_cast<uint64_t>(next) * 17894857484156487943ULL); -  return ret; -} - -uint64_t ChainedWordHash(const WordIndex *word, const WordIndex *word_end) { -  if (word == word_end) return 0; -  uint64_t current = static_cast<uint64_t>(*word); -  for (++word; word != word_end; ++word) { -    current = CombineWordHash(current, *word); -  } -  return current; -} - -bool IsEntirelyWhiteSpace(const StringPiece &line) { -  for (size_t i = 0; i < static_cast<size_t>(line.size()); ++i) { -    if (!isspace(line.data()[i])) return false; -  } -  return true; -} - -void ReadARPACounts(util::FilePiece &in, std::vector<size_t> &number) { -  number.clear(); -  StringPiece line; -  if (!IsEntirelyWhiteSpace(line = in.ReadLine())) UTIL_THROW(FormatLoadException, "First line was \"" << line << "\" not blank"); -  if ((line = in.ReadLine()) != "\\data\\") UTIL_THROW(FormatLoadException, "second line was \"" << line << "\" not \\data\\."); -  while (!IsEntirelyWhiteSpace(line = in.ReadLine())) { -    if (line.size() < 6 || strncmp(line.data(), "ngram ", 6)) UTIL_THROW(FormatLoadException, "count line \"" << line << "\"doesn't begin with \"ngram \""); -    // So strtol doesn't go off the end of line.   -    std::string remaining(line.data() + 6, line.size() - 6); -    char *end_ptr; -    unsigned long int length = std::strtol(remaining.c_str(), &end_ptr, 10); -    if ((end_ptr == remaining.c_str()) || (length - 1 != number.size())) UTIL_THROW(FormatLoadException, "ngram count lengths should be consecutive starting with 1: " << line); -    if (*end_ptr != '=') UTIL_THROW(FormatLoadException, "Expected = immediately following the first number in the count line " << line); -    ++end_ptr; -    const char *start = end_ptr; -    long int count = std::strtol(start, &end_ptr, 10); -    if (count < 0) UTIL_THROW(FormatLoadException, "Negative n-gram count " << count); -    if (start == end_ptr) UTIL_THROW(FormatLoadException, "Couldn't parse n-gram count from " << line); -    number.push_back(count); -  } -} - -void ReadNGramHeader(util::FilePiece &in, unsigned int length) { -  StringPiece line; -  while (IsEntirelyWhiteSpace(line = in.ReadLine())) {} -  std::stringstream expected; -  expected << '\\' << length << "-grams:"; -  if (line != expected.str()) UTIL_THROW(FormatLoadException, "Was expecting n-gram header " << expected.str() << " but got " << line << " instead."); -} - -// Special unigram reader because unigram's data structure is different and because we're inserting vocab words. -template <class Voc> void Read1Grams(util::FilePiece &f, const size_t count, Voc &vocab, ProbBackoff *unigrams) { -  ReadNGramHeader(f, 1); -  for (size_t i = 0; i < count; ++i) { -    try { -      float prob = f.ReadFloat(); -      if (f.get() != '\t') UTIL_THROW(FormatLoadException, "Expected tab after probability"); -      ProbBackoff &value = unigrams[vocab.Insert(f.ReadDelimited())]; -      value.prob = prob; -      switch (f.get()) { -        case '\t': -          value.SetBackoff(f.ReadFloat()); -          if ((f.get() != '\n')) UTIL_THROW(FormatLoadException, "Expected newline after backoff"); -          break; -        case '\n': -          value.ZeroBackoff(); -          break; -        default: -          UTIL_THROW(FormatLoadException, "Expected tab or newline after unigram"); -      } -     } catch(util::Exception &e) { -      e << " in the " << i << "th 1-gram at byte " << f.Offset(); -      throw; -    } -  } -  if (f.ReadLine().size()) UTIL_THROW(FormatLoadException, "Expected blank line after unigrams at byte " << f.Offset()); -} - -template <class Voc, class Store> void ReadNGrams(util::FilePiece &f, const unsigned int n, const size_t count, const Voc &vocab, Store &store) { -  ReadNGramHeader(f, n); - -  // vocab ids of words in reverse order -  WordIndex vocab_ids[n]; -  typename Store::Packing::Value value; -  for (size_t i = 0; i < count; ++i) { -    try { -      value.prob = f.ReadFloat(); -      for (WordIndex *vocab_out = &vocab_ids[n-1]; vocab_out >= vocab_ids; --vocab_out) { -        *vocab_out = vocab.Index(f.ReadDelimited()); -      } -      uint64_t key = ChainedWordHash(vocab_ids, vocab_ids + n); - -      switch (f.get()) { -        case '\t': -          value.SetBackoff(f.ReadFloat()); -          if ((f.get() != '\n')) UTIL_THROW(FormatLoadException, "Expected newline after backoff"); -          break; -        case '\n': -          value.ZeroBackoff(); -          break; -        default: -          UTIL_THROW(FormatLoadException, "Expected tab or newline after n-gram"); -      } -      store.Insert(Store::Packing::Make(key, value)); -    } catch(util::Exception &e) { -      e << " in the " << i << "th " << n << "-gram at byte " << f.Offset(); -      throw; -    } -  } - -  if (f.ReadLine().size()) UTIL_THROW(FormatLoadException, "Expected blank line after " << n << "-grams at byte " << f.Offset()); -  store.FinishedInserting(); -} - -template <class Search, class VocabularyT> size_t GenericModel<Search, VocabularyT>::Size(const std::vector<size_t> &counts, const Config &config) { -  if (counts.size() > kMaxOrder) UTIL_THROW(FormatLoadException, "This model has order " << counts.size() << ".  Edit ngram.hh's kMaxOrder to at least this value and recompile."); -  if (counts.size() < 2) UTIL_THROW(FormatLoadException, "This ngram implementation assumes at least a bigram model."); -  size_t memory_size = VocabularyT::Size(counts[0], config.probing_multiplier); -  memory_size += sizeof(ProbBackoff) * (counts[0] + 1); // +1 for hallucinate <unk> -  for (unsigned char n = 2; n < counts.size(); ++n) { -    memory_size += Middle::Size(counts[n - 1], config.probing_multiplier); -  } -  memory_size += Longest::Size(counts.back(), config.probing_multiplier); -  return memory_size; -} - -template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::SetupMemory(char *base, const std::vector<size_t> &counts, const Config &config) { -  char *start = base; -  size_t allocated = VocabularyT::Size(counts[0], config.probing_multiplier); -  vocab_.Init(start, allocated, counts[0]); -  start += allocated; -  unigram_ = reinterpret_cast<ProbBackoff*>(start); -  start += sizeof(ProbBackoff) * (counts[0] + 1); -  for (unsigned int n = 2; n < counts.size(); ++n) { -    allocated = Middle::Size(counts[n - 1], config.probing_multiplier); -    middle_.push_back(Middle(start, allocated)); -    start += allocated; -  } -  allocated = Longest::Size(counts.back(), config.probing_multiplier); -  longest_ = Longest(start, allocated); -  start += allocated; -  if (static_cast<std::size_t>(start - base) != Size(counts, config)) UTIL_THROW(FormatLoadException, "The data structures took " << (start - base) << " but Size says they should take " << Size(counts, config)); -} - -const char kMagicBytes[] = "mmap lm http://kheafield.com/code format version 0\n\0"; -struct BinaryFileHeader { -  char magic[sizeof(kMagicBytes)]; -  float zero_f, one_f, minus_half_f; -  WordIndex one_word_index, max_word_index; -  uint64_t one_uint64; - -  void SetToReference() { -    std::memcpy(magic, kMagicBytes, sizeof(magic)); -    zero_f = 0.0; one_f = 1.0; minus_half_f = -0.5; -    one_word_index = 1; -    max_word_index = std::numeric_limits<WordIndex>::max(); -    one_uint64 = 1; -  } -}; - -bool IsBinaryFormat(int fd, off_t size) { -  if (size == util::kBadSize || (size <= static_cast<off_t>(sizeof(BinaryFileHeader)))) return false; -  // Try reading the header.   -  util::scoped_mmap memory(mmap(NULL, sizeof(BinaryFileHeader), PROT_READ, MAP_FILE | MAP_PRIVATE, fd, 0), sizeof(BinaryFileHeader)); -  if (memory.get() == MAP_FAILED) return false; -  BinaryFileHeader reference_header = BinaryFileHeader(); -  reference_header.SetToReference(); -  if (!memcmp(memory.get(), &reference_header, sizeof(BinaryFileHeader))) return true; -  if (!memcmp(memory.get(), "mmap lm ", 8)) UTIL_THROW(FormatLoadException, "File looks like it should be loaded with mmap, but the test values don't match.  Was it built on a different machine or with a different compiler?"); -  return false; -} - -std::size_t Align8(std::size_t in) { -  std::size_t off = in % 8; -  if (!off) return in; -  return in + 8 - off; -} - -std::size_t TotalHeaderSize(unsigned int order) { -  return Align8(sizeof(BinaryFileHeader) + 1 /* order */ + sizeof(uint64_t) * order /* counts */ + sizeof(float) /* probing multiplier */ + 1 /* search_tag */); -} - -void ReadBinaryHeader(const void *from, off_t size, std::vector<size_t> &out, float &probing_multiplier, unsigned char &search_tag) { -  const char *from_char = reinterpret_cast<const char*>(from); -  if (size < static_cast<off_t>(1 + sizeof(BinaryFileHeader))) UTIL_THROW(FormatLoadException, "File too short to have count information."); -  // Skip over the BinaryFileHeader which was read by IsBinaryFormat.   -  from_char += sizeof(BinaryFileHeader); -  unsigned char order = *reinterpret_cast<const unsigned char*>(from_char); -  if (size < static_cast<off_t>(TotalHeaderSize(order))) UTIL_THROW(FormatLoadException, "File too short to have full header."); -  out.resize(static_cast<std::size_t>(order)); -  const uint64_t *counts = reinterpret_cast<const uint64_t*>(from_char + 1); -  for (std::size_t i = 0; i < out.size(); ++i) { -    out[i] = static_cast<std::size_t>(counts[i]); -  } -  const float *probing_ptr = reinterpret_cast<const float*>(counts + out.size()); -  probing_multiplier = *probing_ptr; -  search_tag = *reinterpret_cast<const char*>(probing_ptr + 1); -} - -void WriteBinaryHeader(void *to, const std::vector<size_t> &from, float probing_multiplier, char search_tag) { -  BinaryFileHeader header = BinaryFileHeader(); -  header.SetToReference(); -  memcpy(to, &header, sizeof(BinaryFileHeader)); -  char *out = reinterpret_cast<char*>(to) + sizeof(BinaryFileHeader); -  *reinterpret_cast<unsigned char*>(out) = static_cast<unsigned char>(from.size()); -  uint64_t *counts = reinterpret_cast<uint64_t*>(out + 1); -  for (std::size_t i = 0; i < from.size(); ++i) { -    counts[i] = from[i]; -  } -  float *probing_ptr = reinterpret_cast<float*>(counts + from.size()); -  *probing_ptr = probing_multiplier; -  *reinterpret_cast<char*>(probing_ptr + 1) = search_tag; -} - -template <class Search, class VocabularyT> GenericModel<Search, VocabularyT>::GenericModel(const char *file, Config config) : mapped_file_(util::OpenReadOrThrow(file)) { -  const off_t file_size = util::SizeFile(mapped_file_.get()); - -  std::vector<size_t> counts; - -  if (IsBinaryFormat(mapped_file_.get(), file_size)) { -    memory_.reset(util::MapForRead(file_size, config.prefault, mapped_file_.get()), file_size); - -    unsigned char search_tag; -    ReadBinaryHeader(memory_.begin(), file_size, counts, config.probing_multiplier, search_tag); -    if (config.probing_multiplier < 1.0) UTIL_THROW(FormatLoadException, "Binary format claims to have a probing multiplier of " << config.probing_multiplier << " which is < 1.0."); -    if (search_tag != Search::kBinaryTag) UTIL_THROW(FormatLoadException, "The binary file has a different search strategy than the one requested."); -    size_t memory_size = Size(counts, config); - -    char *start = reinterpret_cast<char*>(memory_.get()) + TotalHeaderSize(counts.size()); -    if (memory_size != static_cast<size_t>(memory_.end() - start)) UTIL_THROW(FormatLoadException, "The mmap file " << file << " has size " << file_size << " but " << (memory_size + TotalHeaderSize(counts.size())) << " was expected based on the number of counts and configuration."); - -    SetupMemory(start, counts, config); -    vocab_.LoadedBinary(); -    for (typename std::vector<Middle>::iterator i = middle_.begin(); i != middle_.end(); ++i) { -      i->LoadedBinary(); -    } -    longest_.LoadedBinary(); - -  } else { -    if (config.probing_multiplier <= 1.0) UTIL_THROW(FormatLoadException, "probing multiplier must be > 1.0"); - -    util::FilePiece f(file, mapped_file_.release(), config.messages); -    ReadARPACounts(f, counts); -    size_t memory_size = Size(counts, config); -    char *start; - -    if (config.write_mmap) { -      // Write out an mmap file.   -      util::MapZeroedWrite(config.write_mmap, TotalHeaderSize(counts.size()) + memory_size, mapped_file_, memory_); -      WriteBinaryHeader(memory_.get(), counts, config.probing_multiplier, Search::kBinaryTag); -      start = reinterpret_cast<char*>(memory_.get()) + TotalHeaderSize(counts.size()); -    } else { -      memory_.reset(util::MapAnonymous(memory_size), memory_size); -      start = reinterpret_cast<char*>(memory_.get()); -    } -    SetupMemory(start, counts, config); -    try { -      LoadFromARPA(f, counts, config); -    } catch (FormatLoadException &e) { -      e << " in file " << file; -      throw; -    } -  } - -  // g++ prints warnings unless these are fully initialized.   -  State begin_sentence = State(); -  begin_sentence.valid_length_ = 1; -  begin_sentence.history_[0] = vocab_.BeginSentence(); -  begin_sentence.backoff_[0] = unigram_[begin_sentence.history_[0]].backoff; -  State null_context = State(); -  null_context.valid_length_ = 0; -  P::Init(begin_sentence, null_context, vocab_, counts.size()); -} - -template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::LoadFromARPA(util::FilePiece &f, const std::vector<size_t> &counts, const Config &config) { -  // Read the unigrams. -  Read1Grams(f, counts[0], vocab_, unigram_); -  bool saw_unk = vocab_.FinishedLoading(unigram_); -  if (!saw_unk) { -    switch(config.unknown_missing) { -      case Config::THROW_UP: -        { -          SpecialWordMissingException e("<unk>"); -          e << " and configuration was set to throw if unknown is missing"; -          throw e; -        } -      case Config::COMPLAIN: -        if (config.messages) *config.messages << "Language model is missing <unk>.  Substituting probability " << config.unknown_missing_prob << "." << std::endl;  -        // There's no break;.  This is by design.   -      case Config::SILENT: -        // Default probabilities for unknown.   -        unigram_[0].backoff = 0.0; -        unigram_[0].prob = config.unknown_missing_prob; -        break; -    } -  } -   -  // Read the n-grams. -  for (unsigned int n = 2; n < counts.size(); ++n) { -    ReadNGrams(f, n, counts[n-1], vocab_, middle_[n-2]); -  } -  ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab_, longest_); -  if (std::fabs(unigram_[0].backoff) > 0.0000001) UTIL_THROW(FormatLoadException, "Backoff for unknown word should be zero, but was given as " << unigram_[0].backoff); -} - -/* Ugly optimized function. - * in_state contains the previous ngram's length and backoff probabilites to - * be used here.  out_state is populated with the found ngram length and - * backoffs that the next call will find useful.   - * - * The search goes in increasing order of ngram length.   - */ -template <class Search, class VocabularyT> FullScoreReturn GenericModel<Search, VocabularyT>::FullScore( -    const State &in_state, -    const WordIndex new_word, -    State &out_state) const { - -  FullScoreReturn ret; -  // This is end pointer passed to SumBackoffs. -  const ProbBackoff &unigram = unigram_[new_word]; -  if (new_word == 0) { -    ret.ngram_length = out_state.valid_length_ = 0; -    // all of backoff. -    ret.prob = std::accumulate( -        in_state.backoff_, -        in_state.backoff_ + in_state.valid_length_, -        unigram.prob); -    return ret; -  } -  float *backoff_out(out_state.backoff_); -  *backoff_out = unigram.backoff; -  ret.prob = unigram.prob; -  out_state.history_[0] = new_word; -  if (in_state.valid_length_ == 0) { -    ret.ngram_length = out_state.valid_length_ = 1; -    // No backoff because NGramLength() == 0 and unknown can't have backoff. -    return ret; -  } -  ++backoff_out; - -  // Ok now we now that the bigram contains known words.  Start by looking it up. - -  uint64_t lookup_hash = static_cast<uint64_t>(new_word); -  const WordIndex *hist_iter = in_state.history_; -  const WordIndex *const hist_end = hist_iter + in_state.valid_length_; -  typename std::vector<Middle>::const_iterator mid_iter = middle_.begin(); -  for (; ; ++mid_iter, ++hist_iter, ++backoff_out) { -    if (hist_iter == hist_end) { -      // Used history [in_state.history_, hist_end) and ran out.  No backoff.   -      std::copy(in_state.history_, hist_end, out_state.history_ + 1); -      ret.ngram_length = out_state.valid_length_ = in_state.valid_length_ + 1; -      // ret.prob was already set. -      return ret; -    } -    lookup_hash = CombineWordHash(lookup_hash, *hist_iter); -    if (mid_iter == middle_.end()) break; -    typename Middle::ConstIterator found; -    if (!mid_iter->Find(lookup_hash, found)) { -      // Didn't find an ngram using hist_iter.   -      // The history used in the found n-gram is [in_state.history_, hist_iter).   -      std::copy(in_state.history_, hist_iter, out_state.history_ + 1); -      // Therefore, we found a (hist_iter - in_state.history_ + 1)-gram including the last word.   -      ret.ngram_length = out_state.valid_length_ = (hist_iter - in_state.history_) + 1; -      ret.prob = std::accumulate( -          in_state.backoff_ + (mid_iter - middle_.begin()), -          in_state.backoff_ + in_state.valid_length_, -          ret.prob); -      return ret; -    } -    *backoff_out = found->GetValue().backoff; -    ret.prob = found->GetValue().prob; -  } -   -  typename Longest::ConstIterator found; -  if (!longest_.Find(lookup_hash, found)) { -    // It's an (P::Order()-1)-gram -    std::copy(in_state.history_, in_state.history_ + P::Order() - 2, out_state.history_ + 1); -    ret.ngram_length = out_state.valid_length_ = P::Order() - 1; -    ret.prob += in_state.backoff_[P::Order() - 2]; -    return ret; -  } -  // It's an P::Order()-gram -  // out_state.valid_length_ is still P::Order() - 1 because the next lookup will only need that much. -  std::copy(in_state.history_, in_state.history_ + P::Order() - 2, out_state.history_ + 1); -  out_state.valid_length_ = P::Order() - 1; -  ret.ngram_length = P::Order(); -  ret.prob = found->GetValue().prob; -  return ret; -} - -template class GenericModel<ProbingSearch, MapVocabulary<ProbingSearch> >; -template class GenericModel<SortedUniformSearch, SortedVocabulary>; -} // namespace detail -} // namespace ngram -} // namespace lm | 
