summaryrefslogtreecommitdiff
path: root/klm/lm/search_hashed.cc
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-07-05 23:19:54 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2011-07-05 23:19:54 -0400
commitc3e46171f722f6276e2613ea6cb087b07325d794 (patch)
tree4a289539c4e7a972009dc2f1b680004b959547df /klm/lm/search_hashed.cc
parentf91319978f6e74e5c4e5701da8fbbacb96a3161e (diff)
parent59932be2de387ecfcaa81a8387e8f21d5123c050 (diff)
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'klm/lm/search_hashed.cc')
-rw-r--r--klm/lm/search_hashed.cc38
1 files changed, 29 insertions, 9 deletions
diff --git a/klm/lm/search_hashed.cc b/klm/lm/search_hashed.cc
index eaad59ab..c56ba7b8 100644
--- a/klm/lm/search_hashed.cc
+++ b/klm/lm/search_hashed.cc
@@ -80,6 +80,21 @@ template <class Voc, class Store, class Middle, class Activate> void ReadNGrams(
} // namespace
namespace detail {
+
+template <class MiddleT, class LongestT> uint8_t *TemplateHashedSearch<MiddleT, LongestT>::SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) {
+ std::size_t allocated = Unigram::Size(counts[0]);
+ unigram = Unigram(start, allocated);
+ start += allocated;
+ for (unsigned int n = 2; n < counts.size(); ++n) {
+ allocated = Middle::Size(counts[n - 1], config.probing_multiplier);
+ middle_.push_back(Middle(start, allocated));
+ start += allocated;
+ }
+ allocated = Longest::Size(counts.back(), config.probing_multiplier);
+ longest = Longest(start, allocated);
+ start += allocated;
+ return start;
+}
template <class MiddleT, class LongestT> template <class Voc> void TemplateHashedSearch<MiddleT, LongestT>::InitializeFromARPA(const char * /*file*/, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, Voc &vocab, Backing &backing) {
// TODO: fix sorted.
@@ -92,15 +107,15 @@ template <class MiddleT, class LongestT> template <class Voc> void TemplateHashe
try {
if (counts.size() > 2) {
- ReadNGrams(f, 2, counts[1], vocab, middle, ActivateUnigram(unigram.Raw()), middle[0], warn);
+ ReadNGrams(f, 2, counts[1], vocab, middle_, ActivateUnigram(unigram.Raw()), middle_[0], warn);
}
for (unsigned int n = 3; n < counts.size(); ++n) {
- ReadNGrams(f, n, counts[n-1], vocab, middle, ActivateLowerMiddle<Middle>(middle[n-3]), middle[n-2], warn);
+ ReadNGrams(f, n, counts[n-1], vocab, middle_, ActivateLowerMiddle<Middle>(middle_[n-3]), middle_[n-2], warn);
}
if (counts.size() > 2) {
- ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, middle, ActivateLowerMiddle<Middle>(middle.back()), longest, warn);
+ ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, middle_, ActivateLowerMiddle<Middle>(middle_.back()), longest, warn);
} else {
- ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, middle, ActivateUnigram(unigram.Raw()), longest, warn);
+ ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, middle_, ActivateUnigram(unigram.Raw()), longest, warn);
}
} catch (util::ProbingSizeException &e) {
UTIL_THROW(util::ProbingSizeException, "Avoid pruning n-grams like \"bar baz quux\" when \"foo bar baz quux\" is still in the model. KenLM will work when this pruning happens, but the probing model assumes these events are rare enough that using blank space in the probing hash table will cover all of them. Increase probing_multiplier (-p to build_binary) to add more blank spaces.\n");
@@ -108,13 +123,18 @@ template <class MiddleT, class LongestT> template <class Voc> void TemplateHashe
ReadEnd(f);
}
-template void TemplateHashedSearch<ProbingHashedSearch::Middle, ProbingHashedSearch::Longest>::InitializeFromARPA(const char *, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &, ProbingVocabulary &vocab, Backing &backing);
-template void TemplateHashedSearch<SortedHashedSearch::Middle, SortedHashedSearch::Longest>::InitializeFromARPA(const char *, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &, SortedVocabulary &vocab, Backing &backing);
-
-SortedHashedSearch::SortedHashedSearch() {
- UTIL_THROW(util::Exception, "Sorted is broken at the moment, sorry");
+template <class MiddleT, class LongestT> void TemplateHashedSearch<MiddleT, LongestT>::LoadedBinary() {
+ unigram.LoadedBinary();
+ for (typename std::vector<Middle>::iterator i = middle_.begin(); i != middle_.end(); ++i) {
+ i->LoadedBinary();
+ }
+ longest.LoadedBinary();
}
+template class TemplateHashedSearch<ProbingHashedSearch::Middle, ProbingHashedSearch::Longest>;
+
+template void TemplateHashedSearch<ProbingHashedSearch::Middle, ProbingHashedSearch::Longest>::InitializeFromARPA(const char *, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &, ProbingVocabulary &vocab, Backing &backing);
+
} // namespace detail
} // namespace ngram
} // namespace lm