diff options
author | Kenneth Heafield <github@kheafield.com> | 2012-05-16 13:24:08 -0700 |
---|---|---|
committer | Chris Dyer <cdyer@cab.ark.cs.cmu.edu> | 2012-05-26 22:59:54 -0400 |
commit | 2b63fa0755954edf467a2421997eaf72771260cf (patch) | |
tree | ffb22b22540cd59f20f7de6bfed4313f8b946407 /klm/lm/value_build.cc | |
parent | e331ea8e69489cfd727c0ad106c76efa69f3e06c (diff) |
Big kenlm change includes lower order models for probing only. And other stuff.
Diffstat (limited to 'klm/lm/value_build.cc')
-rw-r--r-- | klm/lm/value_build.cc | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/klm/lm/value_build.cc b/klm/lm/value_build.cc new file mode 100644 index 00000000..6124f8da --- /dev/null +++ b/klm/lm/value_build.cc @@ -0,0 +1,58 @@ +#include "lm/value_build.hh" + +#include "lm/model.hh" +#include "lm/read_arpa.hh" + +namespace lm { +namespace ngram { + +template <class Model> LowerRestBuild<Model>::LowerRestBuild(const Config &config, unsigned int order, const typename Model::Vocabulary &vocab) { + UTIL_THROW_IF(config.rest_lower_files.size() != order - 1, ConfigException, "This model has order " << order << " so there should be " << (order - 1) << " lower-order models for rest cost purposes."); + Config for_lower = config; + for_lower.rest_lower_files.clear(); + + // Unigram models aren't supported, so this is a custom loader. + // TODO: optimize the unigram loading? + { + util::FilePiece uni(config.rest_lower_files[0].c_str()); + std::vector<uint64_t> number; + ReadARPACounts(uni, number); + UTIL_THROW_IF(number.size() != 1, FormatLoadException, "Expected the unigram model to have order 1, not " << number.size()); + ReadNGramHeader(uni, 1); + unigrams_.resize(number[0]); + unigrams_[0] = config.unknown_missing_logprob; + PositiveProbWarn warn; + for (uint64_t i = 0; i < number[0]; ++i) { + WordIndex w; + Prob entry; + ReadNGram(uni, 1, vocab, &w, entry, warn); + unigrams_[w] = entry.prob; + } + } + + try { + for (unsigned int i = 2; i < order; ++i) { + models_.push_back(new Model(config.rest_lower_files[i - 1].c_str(), for_lower)); + UTIL_THROW_IF(models_.back()->Order() != i, FormatLoadException, "Lower order file " << config.rest_lower_files[i-1] << " should have order " << i); + } + } catch (...) { + for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) { + delete *i; + } + models_.clear(); + throw; + } + + // TODO: force/check same vocab. +} + +template <class Model> LowerRestBuild<Model>::~LowerRestBuild() { + for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) { + delete *i; + } +} + +template class LowerRestBuild<ProbingModel>; + +} // namespace ngram +} // namespace lm |