From 66e5956906e61b047d2fd451f3053916cbc92433 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 13 Dec 2010 16:18:34 -0500 Subject: new version of kenlm --- klm/lm/model.hh | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'klm/lm/model.hh') diff --git a/klm/lm/model.hh b/klm/lm/model.hh index e0eeee17..53e5773d 100644 --- a/klm/lm/model.hh +++ b/klm/lm/model.hh @@ -12,6 +12,8 @@ #include #include +#include + namespace util { class FilePiece; } namespace lm { @@ -21,9 +23,10 @@ namespace ngram { // Having this limit means that State can be // (kMaxOrder - 1) * sizeof(float) bytes instead of // sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead -const std::size_t kMaxOrder = 6; +const unsigned char kMaxOrder = 6; -// This is a POD. +// This is a POD but if you want memcmp to return the same as operator==, call +// ZeroRemaining first. class State { public: bool operator==(const State &other) const { @@ -37,6 +40,22 @@ class State { return true; } + // Three way comparison function. + int Compare(const State &other) const { + if (valid_length_ == other.valid_length_) { + return memcmp(history_, other.history_, valid_length_ * sizeof(WordIndex)); + } + return (valid_length_ < other.valid_length_) ? -1 : 1; + } + + // Call this before using raw memcmp. + void ZeroRemaining() { + for (unsigned char i = valid_length_; i < kMaxOrder - 1; ++i) { + history_[i] = 0; + backoff_[i] = 0.0; + } + } + // You shouldn't need to touch anything below this line, but the members are public so FullState will qualify as a POD. // This order minimizes total size of the struct if WordIndex is 64 bit, float is 32 bit, and alignment of 64 bit integers is 64 bit. WordIndex history_[kMaxOrder - 1]; -- cgit v1.2.3