summaryrefslogtreecommitdiff
path: root/klm/lm/model.hh
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2010-12-13 16:18:34 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2010-12-13 16:18:34 -0500
commitbe98f29f51350c24136c191f01af3fbfe340ef78 (patch)
tree2e104152110ca76b527147458050a41934e031f2 /klm/lm/model.hh
parent063c0623aaf5dad8d02e5eae5793c123cd7fc3fe (diff)
new version of kenlm
Diffstat (limited to 'klm/lm/model.hh')
-rw-r--r--klm/lm/model.hh23
1 files changed, 21 insertions, 2 deletions
diff --git a/klm/lm/model.hh b/klm/lm/model.hh
index e0eeee17..53e5773d 100644
--- a/klm/lm/model.hh
+++ b/klm/lm/model.hh
@@ -12,6 +12,8 @@
#include <algorithm>
#include <vector>
+#include <string.h>
+
namespace util { class FilePiece; }
namespace lm {
@@ -21,9 +23,10 @@ namespace ngram {
// Having this limit means that State can be
// (kMaxOrder - 1) * sizeof(float) bytes instead of
// sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead
-const std::size_t kMaxOrder = 6;
+const unsigned char kMaxOrder = 6;
-// This is a POD.
+// This is a POD but if you want memcmp to return the same as operator==, call
+// ZeroRemaining first.
class State {
public:
bool operator==(const State &other) const {
@@ -37,6 +40,22 @@ class State {
return true;
}
+ // Three way comparison function.
+ int Compare(const State &other) const {
+ if (valid_length_ == other.valid_length_) {
+ return memcmp(history_, other.history_, valid_length_ * sizeof(WordIndex));
+ }
+ return (valid_length_ < other.valid_length_) ? -1 : 1;
+ }
+
+ // Call this before using raw memcmp.
+ void ZeroRemaining() {
+ for (unsigned char i = valid_length_; i < kMaxOrder - 1; ++i) {
+ history_[i] = 0;
+ backoff_[i] = 0.0;
+ }
+ }
+
// You shouldn't need to touch anything below this line, but the members are public so FullState will qualify as a POD.
// This order minimizes total size of the struct if WordIndex is 64 bit, float is 32 bit, and alignment of 64 bit integers is 64 bit.
WordIndex history_[kMaxOrder - 1];