diff options
author | armatthews <armatthe@cmu.edu> | 2014-10-13 14:59:23 -0400 |
---|---|---|
committer | armatthews <armatthe@cmu.edu> | 2014-10-13 14:59:23 -0400 |
commit | b26cda84e05d4523eee069234a975a0153bf8608 (patch) | |
tree | 61c9da4f8dd6070f27c8e81812a76fc0a8cf2d8d /klm/lm/bhiksha.hh | |
parent | cd7bc67f475fdfd07fba003ac4cca40e83944740 (diff) | |
parent | b1ed81ef3216b212295afa76c5d20a56fb647204 (diff) |
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'klm/lm/bhiksha.hh')
-rw-r--r-- | klm/lm/bhiksha.hh | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/klm/lm/bhiksha.hh b/klm/lm/bhiksha.hh index 350571a6..134beb2f 100644 --- a/klm/lm/bhiksha.hh +++ b/klm/lm/bhiksha.hh @@ -10,17 +10,19 @@ * Currently only used for next pointers. */ -#ifndef LM_BHIKSHA__ -#define LM_BHIKSHA__ - -#include <stdint.h> -#include <assert.h> +#ifndef LM_BHIKSHA_H +#define LM_BHIKSHA_H #include "lm/model_type.hh" #include "lm/trie.hh" #include "util/bit_packing.hh" #include "util/sorted_uniform.hh" +#include <algorithm> + +#include <stdint.h> +#include <assert.h> + namespace lm { namespace ngram { struct Config; @@ -73,15 +75,24 @@ class ArrayBhiksha { ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_value, const Config &config); void ReadNext(const void *base, uint64_t bit_offset, uint64_t index, uint8_t total_bits, NodeRange &out) const { - const uint64_t *begin_it = util::BinaryBelow(util::IdentityAccessor<uint64_t>(), offset_begin_, offset_end_, index); + // Some assertions are commented out because they are expensive. + // assert(*offset_begin_ == 0); + // std::upper_bound returns the first element that is greater. Want the + // last element that is <= to the index. + const uint64_t *begin_it = std::upper_bound(offset_begin_, offset_end_, index) - 1; + // Since *offset_begin_ == 0, the position should be in range. + // assert(begin_it >= offset_begin_); const uint64_t *end_it; - for (end_it = begin_it; (end_it < offset_end_) && (*end_it <= index + 1); ++end_it) {} + for (end_it = begin_it + 1; (end_it < offset_end_) && (*end_it <= index + 1); ++end_it) {} + // assert(end_it == std::upper_bound(offset_begin_, offset_end_, index + 1)); --end_it; + // assert(end_it >= begin_it); out.begin = ((begin_it - offset_begin_) << next_inline_.bits) | util::ReadInt57(base, bit_offset, next_inline_.bits, next_inline_.mask); out.end = ((end_it - offset_begin_) << next_inline_.bits) | util::ReadInt57(base, bit_offset + total_bits, next_inline_.bits, next_inline_.mask); - //assert(out.end >= out.begin); + // If this fails, consider rebuilding your model using KenLM after 1e333d786b748555e8f368d2bbba29a016c98052 + assert(out.end >= out.begin); } void WriteNext(void *base, uint64_t bit_offset, uint64_t index, uint64_t value) { @@ -109,4 +120,4 @@ class ArrayBhiksha { } // namespace ngram } // namespace lm -#endif // LM_BHIKSHA__ +#endif // LM_BHIKSHA_H |