From 2b63fa0755954edf467a2421997eaf72771260cf Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 16 May 2012 13:24:08 -0700 Subject: Big kenlm change includes lower order models for probing only. And other stuff. --- klm/lm/quantize.cc | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'klm/lm/quantize.cc') diff --git a/klm/lm/quantize.cc b/klm/lm/quantize.cc index a8e0cb21..b58c3f3f 100644 --- a/klm/lm/quantize.cc +++ b/klm/lm/quantize.cc @@ -47,9 +47,7 @@ void SeparatelyQuantize::UpdateConfigFromBinary(int fd, const std::vector(static_cast(start) + 8); +void SeparatelyQuantize::SetupMemory(void *base, unsigned char order, const Config &config) { prob_bits_ = config.prob_bits; backoff_bits_ = config.backoff_bits; // We need the reserved values. @@ -57,25 +55,35 @@ void SeparatelyQuantize::SetupMemory(void *start, const Config &config) { if (config.backoff_bits == 0) UTIL_THROW(ConfigException, "You can't quantize backoff to zero"); if (config.prob_bits > 25) UTIL_THROW(ConfigException, "For efficiency reasons, quantizing probability supports at most 25 bits. Currently you have requested " << static_cast(config.prob_bits) << " bits."); if (config.backoff_bits > 25) UTIL_THROW(ConfigException, "For efficiency reasons, quantizing backoff supports at most 25 bits. Currently you have requested " << static_cast(config.backoff_bits) << " bits."); + // Reserve 8 byte header for bit counts. + actual_base_ = static_cast(base); + float *start = reinterpret_cast(actual_base_ + 8); + for (unsigned char i = 0; i < order - 2; ++i) { + tables_[i][0] = Bins(prob_bits_, start); + start += (1ULL << prob_bits_); + tables_[i][1] = Bins(backoff_bits_, start); + start += (1ULL << backoff_bits_); + } + longest_ = tables_[order - 2][0] = Bins(prob_bits_, start); } void SeparatelyQuantize::Train(uint8_t order, std::vector &prob, std::vector &backoff) { TrainProb(order, prob); // Backoff - float *centers = start_ + TableStart(order) + ProbTableLength(); + float *centers = tables_[order - 2][1].Populate(); *(centers++) = kNoExtensionBackoff; *(centers++) = kExtensionBackoff; MakeBins(backoff, centers, (1ULL << backoff_bits_) - 2); } void SeparatelyQuantize::TrainProb(uint8_t order, std::vector &prob) { - float *centers = start_ + TableStart(order); + float *centers = tables_[order - 2][0].Populate(); MakeBins(prob, centers, (1ULL << prob_bits_)); } void SeparatelyQuantize::FinishedLoading(const Config &config) { - uint8_t *actual_base = reinterpret_cast(start_) - 8; + uint8_t *actual_base = actual_base_; *(actual_base++) = kSeparatelyQuantizeVersion; // version *(actual_base++) = config.prob_bits; *(actual_base++) = config.backoff_bits; -- cgit v1.2.3