summaryrefslogtreecommitdiff
path: root/klm/lm/model.hh
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2010-12-13 16:18:34 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2010-12-13 16:18:34 -0500
commit66e5956906e61b047d2fd451f3053916cbc92433 (patch)
tree5bd4222506deae0c8e5f4c001bb6f7505b73f846 /klm/lm/model.hh
parent6d2a75d7deb35fcb2fac674190c19e0a0143aaed (diff)
new version of kenlm
Diffstat (limited to 'klm/lm/model.hh')
-rw-r--r--klm/lm/model.hh23
1 files changed, 21 insertions, 2 deletions
diff --git a/klm/lm/model.hh b/klm/lm/model.hh
index e0eeee17..53e5773d 100644
--- a/klm/lm/model.hh
+++ b/klm/lm/model.hh
@@ -12,6 +12,8 @@
#include <algorithm>
#include <vector>
+#include <string.h>
+
namespace util { class FilePiece; }
namespace lm {
@@ -21,9 +23,10 @@ namespace ngram {
// Having this limit means that State can be
// (kMaxOrder - 1) * sizeof(float) bytes instead of
// sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead
-const std::size_t kMaxOrder = 6;
+const unsigned char kMaxOrder = 6;
-// This is a POD.
+// This is a POD but if you want memcmp to return the same as operator==, call
+// ZeroRemaining first.
class State {
public:
bool operator==(const State &other) const {
@@ -37,6 +40,22 @@ class State {
return true;
}
+ // Three way comparison function.
+ int Compare(const State &other) const {
+ if (valid_length_ == other.valid_length_) {
+ return memcmp(history_, other.history_, valid_length_ * sizeof(WordIndex));
+ }
+ return (valid_length_ < other.valid_length_) ? -1 : 1;
+ }
+
+ // Call this before using raw memcmp.
+ void ZeroRemaining() {
+ for (unsigned char i = valid_length_; i < kMaxOrder - 1; ++i) {
+ history_[i] = 0;
+ backoff_[i] = 0.0;
+ }
+ }
+
// You shouldn't need to touch anything below this line, but the members are public so FullState will qualify as a POD.
// This order minimizes total size of the struct if WordIndex is 64 bit, float is 32 bit, and alignment of 64 bit integers is 64 bit.
WordIndex history_[kMaxOrder - 1];