diff options
author | Wu, Ke <wuke@cs.umd.edu> | 2014-12-17 16:11:38 -0500 |
---|---|---|
committer | Wu, Ke <wuke@cs.umd.edu> | 2014-12-17 16:11:38 -0500 |
commit | 7468e8d85e99b4619442c7afaf4a0d92870111bb (patch) | |
tree | a6f17da7c69048c8900260b5490bb9d8611be3bb /klm/lm/builder/ngram.hh | |
parent | b6dd5a683db9dda2d634dd2fdb76606819594901 (diff) | |
parent | 1a79175f9a101d46cf27ca921213d5dd9300518f (diff) |
Merge with upstream
Diffstat (limited to 'klm/lm/builder/ngram.hh')
-rw-r--r-- | klm/lm/builder/ngram.hh | 39 |
1 files changed, 32 insertions, 7 deletions
diff --git a/klm/lm/builder/ngram.hh b/klm/lm/builder/ngram.hh index f5681516..0472bcb1 100644 --- a/klm/lm/builder/ngram.hh +++ b/klm/lm/builder/ngram.hh @@ -1,5 +1,5 @@ -#ifndef LM_BUILDER_NGRAM__ -#define LM_BUILDER_NGRAM__ +#ifndef LM_BUILDER_NGRAM_H +#define LM_BUILDER_NGRAM_H #include "lm/weights.hh" #include "lm/word_index.hh" @@ -26,7 +26,7 @@ union Payload { class NGram { public: - NGram(void *begin, std::size_t order) + NGram(void *begin, std::size_t order) : begin_(static_cast<WordIndex*>(begin)), end_(begin_ + order) {} const uint8_t *Base() const { return reinterpret_cast<const uint8_t*>(begin_); } @@ -38,12 +38,12 @@ class NGram { end_ = begin_ + difference; } - // Would do operator++ but that can get confusing for a stream. + // Would do operator++ but that can get confusing for a stream. void NextInMemory() { ReBase(&Value() + 1); } - // Lower-case in deference to STL. + // Lower-case in deference to STL. const WordIndex *begin() const { return begin_; } WordIndex *begin() { return begin_; } const WordIndex *end() const { return end_; } @@ -61,7 +61,7 @@ class NGram { return order * sizeof(WordIndex) + sizeof(Payload); } std::size_t TotalSize() const { - // Compiler should optimize this. + // Compiler should optimize this. return TotalSize(Order()); } static std::size_t OrderFromSize(std::size_t size) { @@ -69,6 +69,31 @@ class NGram { assert(size == TotalSize(ret)); return ret; } + + // manipulate msb to signal that ngram can be pruned + /*mjd**********************************************************************/ + + bool IsMarked() const { + return Value().count >> (sizeof(Value().count) * 8 - 1); + } + + void Mark() { + Value().count |= (1ul << (sizeof(Value().count) * 8 - 1)); + } + + void Unmark() { + Value().count &= ~(1ul << (sizeof(Value().count) * 8 - 1)); + } + + uint64_t UnmarkedCount() const { + return Value().count & ~(1ul << (sizeof(Value().count) * 8 - 1)); + } + + uint64_t CutoffCount() const { + return IsMarked() ? 0 : UnmarkedCount(); + } + + /*mjd**********************************************************************/ private: WordIndex *begin_, *end_; @@ -81,4 +106,4 @@ const WordIndex kEOS = 2; } // namespace builder } // namespace lm -#endif // LM_BUILDER_NGRAM__ +#endif // LM_BUILDER_NGRAM_H |