diff options
author | Kenneth Heafield <github@kheafield.com> | 2014-01-27 17:42:19 -0800 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2014-01-27 17:42:19 -0800 |
commit | 783c57b2d3312738ddcf992ac55ff750afe7cb47 (patch) | |
tree | c4811dab0d916836b8631f3c7df94f284a490b9b /klm/lm/search_hashed.cc | |
parent | f7e051a05d65ef25c2ada0b84cd82bfb375ef265 (diff) |
KenLM 5cc905bc2d214efa7de2db56a9a672b749a95591
Diffstat (limited to 'klm/lm/search_hashed.cc')
-rw-r--r-- | klm/lm/search_hashed.cc | 33 |
1 files changed, 19 insertions, 14 deletions
diff --git a/klm/lm/search_hashed.cc b/klm/lm/search_hashed.cc index 62275d27..354a56b4 100644 --- a/klm/lm/search_hashed.cc +++ b/klm/lm/search_hashed.cc @@ -204,9 +204,10 @@ template <class Build, class Activate, class Store> void ReadNGrams( namespace detail { template <class Value> uint8_t *HashedSearch<Value>::SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) { - std::size_t allocated = Unigram::Size(counts[0]); - unigram_ = Unigram(start, counts[0], allocated); - start += allocated; + unigram_ = Unigram(start, counts[0]); + start += Unigram::Size(counts[0]); + std::size_t allocated; + middle_.clear(); for (unsigned int n = 2; n < counts.size(); ++n) { allocated = Middle::Size(counts[n - 1], config.probing_multiplier); middle_.push_back(Middle(start, allocated)); @@ -218,9 +219,21 @@ template <class Value> uint8_t *HashedSearch<Value>::SetupMemory(uint8_t *start, return start; } -template <class Value> void HashedSearch<Value>::InitializeFromARPA(const char * /*file*/, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, ProbingVocabulary &vocab, Backing &backing) { - // TODO: fix sorted. - SetupMemory(GrowForSearch(config, vocab.UnkCountChangePadding(), Size(counts, config), backing), counts, config); +/*template <class Value> void HashedSearch<Value>::Relocate(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) { + unigram_ = Unigram(start, counts[0]); + start += Unigram::Size(counts[0]); + for (unsigned int n = 2; n < counts.size(); ++n) { + middle[n-2].Relocate(start); + start += Middle::Size(counts[n - 1], config.probing_multiplier) + } + longest_.Relocate(start); +}*/ + +template <class Value> void HashedSearch<Value>::InitializeFromARPA(const char * /*file*/, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, ProbingVocabulary &vocab, BinaryFormat &backing) { + void *vocab_rebase; + void *search_base = backing.GrowForSearch(Size(counts, config), vocab.UnkCountChangePadding(), vocab_rebase); + vocab.Relocate(vocab_rebase); + SetupMemory(reinterpret_cast<uint8_t*>(search_base), counts, config); PositiveProbWarn warn(config.positive_log_probability); Read1Grams(f, counts[0], vocab, unigram_.Raw(), warn); @@ -277,14 +290,6 @@ template <class Value> template <class Build> void HashedSearch<Value>::ApplyBui ReadEnd(f); } -template <class Value> void HashedSearch<Value>::LoadedBinary() { - unigram_.LoadedBinary(); - for (typename std::vector<Middle>::iterator i = middle_.begin(); i != middle_.end(); ++i) { - i->LoadedBinary(); - } - longest_.LoadedBinary(); -} - template class HashedSearch<BackoffValue>; template class HashedSearch<RestValue>; |