diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2010-12-13 16:18:34 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2010-12-13 16:18:34 -0500 |
commit | be98f29f51350c24136c191f01af3fbfe340ef78 (patch) | |
tree | 2e104152110ca76b527147458050a41934e031f2 /klm/lm/model.hh | |
parent | 063c0623aaf5dad8d02e5eae5793c123cd7fc3fe (diff) |
new version of kenlm
Diffstat (limited to 'klm/lm/model.hh')
-rw-r--r-- | klm/lm/model.hh | 23 |
1 files changed, 21 insertions, 2 deletions
diff --git a/klm/lm/model.hh b/klm/lm/model.hh index e0eeee17..53e5773d 100644 --- a/klm/lm/model.hh +++ b/klm/lm/model.hh @@ -12,6 +12,8 @@ #include <algorithm> #include <vector> +#include <string.h> + namespace util { class FilePiece; } namespace lm { @@ -21,9 +23,10 @@ namespace ngram { // Having this limit means that State can be // (kMaxOrder - 1) * sizeof(float) bytes instead of // sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead -const std::size_t kMaxOrder = 6; +const unsigned char kMaxOrder = 6; -// This is a POD. +// This is a POD but if you want memcmp to return the same as operator==, call +// ZeroRemaining first. class State { public: bool operator==(const State &other) const { @@ -37,6 +40,22 @@ class State { return true; } + // Three way comparison function. + int Compare(const State &other) const { + if (valid_length_ == other.valid_length_) { + return memcmp(history_, other.history_, valid_length_ * sizeof(WordIndex)); + } + return (valid_length_ < other.valid_length_) ? -1 : 1; + } + + // Call this before using raw memcmp. + void ZeroRemaining() { + for (unsigned char i = valid_length_; i < kMaxOrder - 1; ++i) { + history_[i] = 0; + backoff_[i] = 0.0; + } + } + // You shouldn't need to touch anything below this line, but the members are public so FullState will qualify as a POD. // This order minimizes total size of the struct if WordIndex is 64 bit, float is 32 bit, and alignment of 64 bit integers is 64 bit. WordIndex history_[kMaxOrder - 1]; |