summaryrefslogtreecommitdiff
path: root/klm/lm/builder/ngram.hh
diff options
context:
space:
mode:
authorarmatthews <armatthe@cmu.edu>2014-10-13 14:59:23 -0400
committerarmatthews <armatthe@cmu.edu>2014-10-13 14:59:23 -0400
commitb26cda84e05d4523eee069234a975a0153bf8608 (patch)
tree61c9da4f8dd6070f27c8e81812a76fc0a8cf2d8d /klm/lm/builder/ngram.hh
parentcd7bc67f475fdfd07fba003ac4cca40e83944740 (diff)
parentb1ed81ef3216b212295afa76c5d20a56fb647204 (diff)
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'klm/lm/builder/ngram.hh')
-rw-r--r--klm/lm/builder/ngram.hh39
1 files changed, 32 insertions, 7 deletions
diff --git a/klm/lm/builder/ngram.hh b/klm/lm/builder/ngram.hh
index f5681516..0472bcb1 100644
--- a/klm/lm/builder/ngram.hh
+++ b/klm/lm/builder/ngram.hh
@@ -1,5 +1,5 @@
-#ifndef LM_BUILDER_NGRAM__
-#define LM_BUILDER_NGRAM__
+#ifndef LM_BUILDER_NGRAM_H
+#define LM_BUILDER_NGRAM_H
#include "lm/weights.hh"
#include "lm/word_index.hh"
@@ -26,7 +26,7 @@ union Payload {
class NGram {
public:
- NGram(void *begin, std::size_t order)
+ NGram(void *begin, std::size_t order)
: begin_(static_cast<WordIndex*>(begin)), end_(begin_ + order) {}
const uint8_t *Base() const { return reinterpret_cast<const uint8_t*>(begin_); }
@@ -38,12 +38,12 @@ class NGram {
end_ = begin_ + difference;
}
- // Would do operator++ but that can get confusing for a stream.
+ // Would do operator++ but that can get confusing for a stream.
void NextInMemory() {
ReBase(&Value() + 1);
}
- // Lower-case in deference to STL.
+ // Lower-case in deference to STL.
const WordIndex *begin() const { return begin_; }
WordIndex *begin() { return begin_; }
const WordIndex *end() const { return end_; }
@@ -61,7 +61,7 @@ class NGram {
return order * sizeof(WordIndex) + sizeof(Payload);
}
std::size_t TotalSize() const {
- // Compiler should optimize this.
+ // Compiler should optimize this.
return TotalSize(Order());
}
static std::size_t OrderFromSize(std::size_t size) {
@@ -69,6 +69,31 @@ class NGram {
assert(size == TotalSize(ret));
return ret;
}
+
+ // manipulate msb to signal that ngram can be pruned
+ /*mjd**********************************************************************/
+
+ bool IsMarked() const {
+ return Value().count >> (sizeof(Value().count) * 8 - 1);
+ }
+
+ void Mark() {
+ Value().count |= (1ul << (sizeof(Value().count) * 8 - 1));
+ }
+
+ void Unmark() {
+ Value().count &= ~(1ul << (sizeof(Value().count) * 8 - 1));
+ }
+
+ uint64_t UnmarkedCount() const {
+ return Value().count & ~(1ul << (sizeof(Value().count) * 8 - 1));
+ }
+
+ uint64_t CutoffCount() const {
+ return IsMarked() ? 0 : UnmarkedCount();
+ }
+
+ /*mjd**********************************************************************/
private:
WordIndex *begin_, *end_;
@@ -81,4 +106,4 @@ const WordIndex kEOS = 2;
} // namespace builder
} // namespace lm
-#endif // LM_BUILDER_NGRAM__
+#endif // LM_BUILDER_NGRAM_H