summaryrefslogtreecommitdiff
path: root/klm/lm/search_hashed.cc
diff options
context:
space:
mode:
authorKenneth Heafield <kenlm@kheafield.com>2011-06-26 18:40:15 -0400
committerKenneth Heafield <kenlm@kheafield.com>2011-06-26 18:40:15 -0400
commit205893513c8343fdc55789e427fab4c8b536dc12 (patch)
tree67fdaa819488e231b5d70b2227527510571f2108 /klm/lm/search_hashed.cc
parent9366fc1ce04385290722bd703933bf0c1c166671 (diff)
Quantization
Diffstat (limited to 'klm/lm/search_hashed.cc')
-rw-r--r--klm/lm/search_hashed.cc38
1 files changed, 29 insertions, 9 deletions
diff --git a/klm/lm/search_hashed.cc b/klm/lm/search_hashed.cc
index eaad59ab..c56ba7b8 100644
--- a/klm/lm/search_hashed.cc
+++ b/klm/lm/search_hashed.cc
@@ -80,6 +80,21 @@ template <class Voc, class Store, class Middle, class Activate> void ReadNGrams(
} // namespace
namespace detail {
+
+template <class MiddleT, class LongestT> uint8_t *TemplateHashedSearch<MiddleT, LongestT>::SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) {
+ std::size_t allocated = Unigram::Size(counts[0]);
+ unigram = Unigram(start, allocated);
+ start += allocated;
+ for (unsigned int n = 2; n < counts.size(); ++n) {
+ allocated = Middle::Size(counts[n - 1], config.probing_multiplier);
+ middle_.push_back(Middle(start, allocated));
+ start += allocated;
+ }
+ allocated = Longest::Size(counts.back(), config.probing_multiplier);
+ longest = Longest(start, allocated);
+ start += allocated;
+ return start;
+}
template <class MiddleT, class LongestT> template <class Voc> void TemplateHashedSearch<MiddleT, LongestT>::InitializeFromARPA(const char * /*file*/, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, Voc &vocab, Backing &backing) {
// TODO: fix sorted.
@@ -92,15 +107,15 @@ template <class MiddleT, class LongestT> template <class Voc> void TemplateHashe
try {
if (counts.size() > 2) {
- ReadNGrams(f, 2, counts[1], vocab, middle, ActivateUnigram(unigram.Raw()), middle[0], warn);
+ ReadNGrams(f, 2, counts[1], vocab, middle_, ActivateUnigram(unigram.Raw()), middle_[0], warn);
}
for (unsigned int n = 3; n < counts.size(); ++n) {
- ReadNGrams(f, n, counts[n-1], vocab, middle, ActivateLowerMiddle<Middle>(middle[n-3]), middle[n-2], warn);
+ ReadNGrams(f, n, counts[n-1], vocab, middle_, ActivateLowerMiddle<Middle>(middle_[n-3]), middle_[n-2], warn);
}
if (counts.size() > 2) {
- ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, middle, ActivateLowerMiddle<Middle>(middle.back()), longest, warn);
+ ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, middle_, ActivateLowerMiddle<Middle>(middle_.back()), longest, warn);
} else {
- ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, middle, ActivateUnigram(unigram.Raw()), longest, warn);
+ ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, middle_, ActivateUnigram(unigram.Raw()), longest, warn);
}
} catch (util::ProbingSizeException &e) {
UTIL_THROW(util::ProbingSizeException, "Avoid pruning n-grams like \"bar baz quux\" when \"foo bar baz quux\" is still in the model. KenLM will work when this pruning happens, but the probing model assumes these events are rare enough that using blank space in the probing hash table will cover all of them. Increase probing_multiplier (-p to build_binary) to add more blank spaces.\n");
@@ -108,13 +123,18 @@ template <class MiddleT, class LongestT> template <class Voc> void TemplateHashe
ReadEnd(f);
}
-template void TemplateHashedSearch<ProbingHashedSearch::Middle, ProbingHashedSearch::Longest>::InitializeFromARPA(const char *, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &, ProbingVocabulary &vocab, Backing &backing);
-template void TemplateHashedSearch<SortedHashedSearch::Middle, SortedHashedSearch::Longest>::InitializeFromARPA(const char *, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &, SortedVocabulary &vocab, Backing &backing);
-
-SortedHashedSearch::SortedHashedSearch() {
- UTIL_THROW(util::Exception, "Sorted is broken at the moment, sorry");
+template <class MiddleT, class LongestT> void TemplateHashedSearch<MiddleT, LongestT>::LoadedBinary() {
+ unigram.LoadedBinary();
+ for (typename std::vector<Middle>::iterator i = middle_.begin(); i != middle_.end(); ++i) {
+ i->LoadedBinary();
+ }
+ longest.LoadedBinary();
}
+template class TemplateHashedSearch<ProbingHashedSearch::Middle, ProbingHashedSearch::Longest>;
+
+template void TemplateHashedSearch<ProbingHashedSearch::Middle, ProbingHashedSearch::Longest>::InitializeFromARPA(const char *, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &, ProbingVocabulary &vocab, Backing &backing);
+
} // namespace detail
} // namespace ngram
} // namespace lm