From 5ef94f59e08d2f25bee8520c4233829207d1c034 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Sun, 25 Sep 2011 19:18:36 -0400 Subject: Fix trie sort merging --- klm/lm/trie_sort.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'klm/lm') diff --git a/klm/lm/trie_sort.cc b/klm/lm/trie_sort.cc index 01c4e490..86f28493 100644 --- a/klm/lm/trie_sort.cc +++ b/klm/lm/trie_sort.cc @@ -146,7 +146,7 @@ template void MergeSortedFiles(const std::string &first_name, co ++first; ++second; } } - for (RecordReader &remains = (first ? second : first); remains; ++remains) { + for (RecordReader &remains = (first ? first : second); remains; ++remains) { WriteOrThrow(out_file.get(), remains.Data(), entry_size); } } @@ -191,7 +191,7 @@ void ConvertToSorted(util::FilePiece &f, const SortedVocabulary &vocab, const st assembled << file_prefix << static_cast(order) << "_merge_" << (merge_count++); files.push_back(assembled.str()); MergeSortedFiles(files[0], files[1], files.back(), weights_size, order, ThrowCombine()); - MergeSortedFiles(files[0], files[1], files.back(), 0, order, FirstCombine()); + MergeSortedFiles(files[0] + kContextSuffix, files[1] + kContextSuffix, files.back() + kContextSuffix, 0, order, FirstCombine()); files.pop_front(); files.pop_front(); } -- cgit v1.2.3 From 32288c27a523a1152afa019b9152f4401c3097ce Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Mon, 26 Sep 2011 16:54:16 -0400 Subject: Fix trie pointer segfault --- klm/lm/bhiksha.hh | 2 ++ klm/lm/trie.cc | 6 ++---- klm/lm/trie.hh | 7 ++++--- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'klm/lm') diff --git a/klm/lm/bhiksha.hh b/klm/lm/bhiksha.hh index ff7fe452..bc705959 100644 --- a/klm/lm/bhiksha.hh +++ b/klm/lm/bhiksha.hh @@ -11,6 +11,7 @@ */ #include +#include #include "lm/model_type.hh" #include "lm/trie.hh" @@ -78,6 +79,7 @@ class ArrayBhiksha { util::ReadInt57(base, bit_offset, next_inline_.bits, next_inline_.mask); out.end = ((end_it - offset_begin_) << next_inline_.bits) | util::ReadInt57(base, bit_offset + total_bits, next_inline_.bits, next_inline_.mask); + //assert(out.end >= out.begin); } void WriteNext(void *base, uint64_t bit_offset, uint64_t index, uint64_t value) { diff --git a/klm/lm/trie.cc b/klm/lm/trie.cc index 4e60b184..20075bb8 100644 --- a/klm/lm/trie.cc +++ b/klm/lm/trie.cc @@ -91,16 +91,14 @@ template bool BitPackedMiddle::Find if (!FindBitPacked(base_, word_mask_, word_bits_, total_bits_, range.begin, range.end, max_vocab_, word, at_pointer)) { return false; } - uint64_t index = at_pointer; + pointer = at_pointer; at_pointer *= total_bits_; at_pointer += word_bits_; - pointer = at_pointer; - quant_.Read(base_, at_pointer, prob, backoff); at_pointer += quant_.TotalBits(); - bhiksha_.ReadNext(base_, at_pointer, index, total_bits_, range); + bhiksha_.ReadNext(base_, at_pointer, pointer, total_bits_, range); return true; } diff --git a/klm/lm/trie.hh b/klm/lm/trie.hh index a9f5e417..06cc96ac 100644 --- a/klm/lm/trie.hh +++ b/klm/lm/trie.hh @@ -99,10 +99,11 @@ template class BitPackedMiddle : public BitPacked { bool FindNoProb(WordIndex word, float &backoff, NodeRange &range) const; NodeRange ReadEntry(uint64_t pointer, float &prob) { - quant_.ReadProb(base_, pointer, prob); + uint64_t addr = pointer * total_bits_; + addr += word_bits_; + quant_.ReadProb(base_, addr, prob); NodeRange ret; - // pointer/total_bits_ should always round down. - bhiksha_.ReadNext(base_, pointer + quant_.TotalBits(), pointer / total_bits_, total_bits_, ret); + bhiksha_.ReadNext(base_, addr + quant_.TotalBits(), pointer, total_bits_, ret); return ret; } -- cgit v1.2.3