summaryrefslogtreecommitdiff
path: root/klm/util
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-09-09 15:33:35 +0200
committerPatrick Simianer <p@simianer.de>2011-09-23 19:13:58 +0200
commitfe6acf199a5749f0a604a95e8d7af59bccc7505e (patch)
tree5aaad9282a65deb3b3b560bd3e643b845a42987a /klm/util
parentef74e67449515ff68f598f06ffc9d221eb13f919 (diff)
partial merge, ruleid feature
Diffstat (limited to 'klm/util')
-rw-r--r--klm/util/bit_packing.hh13
-rw-r--r--klm/util/exception.cc28
-rw-r--r--klm/util/exception.hh56
-rw-r--r--klm/util/file_piece.cc42
-rw-r--r--klm/util/file_piece.hh34
-rw-r--r--klm/util/murmur_hash.cc258
-rw-r--r--klm/util/probing_hash_table.hh2
-rw-r--r--klm/util/sorted_uniform.hh23
8 files changed, 281 insertions, 175 deletions
diff --git a/klm/util/bit_packing.hh b/klm/util/bit_packing.hh
index b35d80c8..9f47d559 100644
--- a/klm/util/bit_packing.hh
+++ b/klm/util/bit_packing.hh
@@ -107,9 +107,20 @@ void BitPackingSanity();
uint8_t RequiredBits(uint64_t max_value);
struct BitsMask {
+ static BitsMask ByMax(uint64_t max_value) {
+ BitsMask ret;
+ ret.FromMax(max_value);
+ return ret;
+ }
+ static BitsMask ByBits(uint8_t bits) {
+ BitsMask ret;
+ ret.bits = bits;
+ ret.mask = (1ULL << bits) - 1;
+ return ret;
+ }
void FromMax(uint64_t max_value) {
bits = RequiredBits(max_value);
- mask = (1 << bits) - 1;
+ mask = (1ULL << bits) - 1;
}
uint8_t bits;
uint64_t mask;
diff --git a/klm/util/exception.cc b/klm/util/exception.cc
index 84f9fe7c..62280970 100644
--- a/klm/util/exception.cc
+++ b/klm/util/exception.cc
@@ -1,5 +1,9 @@
#include "util/exception.hh"
+#ifdef __GXX_RTTI
+#include <typeinfo>
+#endif
+
#include <errno.h>
#include <string.h>
@@ -22,6 +26,30 @@ const char *Exception::what() const throw() {
return text_.c_str();
}
+void Exception::SetLocation(const char *file, unsigned int line, const char *func, const char *child_name, const char *condition) {
+ /* The child class might have set some text, but we want this to come first.
+ * Another option would be passing this information to the constructor, but
+ * then child classes would have to accept constructor arguments and pass
+ * them down.
+ */
+ text_ = stream_.str();
+ stream_.str("");
+ stream_ << file << ':' << line;
+ if (func) stream_ << " in " << func << " threw ";
+ if (child_name) {
+ stream_ << child_name;
+ } else {
+#ifdef __GXX_RTTI
+ stream_ << typeid(this).name();
+#else
+ stream_ << "an exception";
+#endif
+ }
+ if (condition) stream_ << " because `" << condition;
+ stream_ << "'.\n";
+ stream_ << text_;
+}
+
namespace {
// The XOPEN version.
const char *HandleStrerror(int ret, const char *buf) {
diff --git a/klm/util/exception.hh b/klm/util/exception.hh
index c6936914..81675a57 100644
--- a/klm/util/exception.hh
+++ b/klm/util/exception.hh
@@ -1,8 +1,6 @@
#ifndef UTIL_EXCEPTION__
#define UTIL_EXCEPTION__
-#include "util/string_piece.hh"
-
#include <exception>
#include <sstream>
#include <string>
@@ -22,6 +20,14 @@ class Exception : public std::exception {
// Not threadsafe, but probably doesn't matter. FWIW, Boost's exception guidance implies that what() isn't threadsafe.
const char *what() const throw();
+ // For use by the UTIL_THROW macros.
+ void SetLocation(
+ const char *file,
+ unsigned int line,
+ const char *func,
+ const char *child_name,
+ const char *condition);
+
private:
template <class Except, class Data> friend typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
@@ -43,7 +49,49 @@ template <class Except, class Data> typename Except::template ExceptionTag<Excep
return e;
}
-#define UTIL_THROW(Exception, Modify) { Exception UTIL_e; {UTIL_e << Modify;} throw UTIL_e; }
+#ifdef __GNUC__
+#define UTIL_FUNC_NAME __PRETTY_FUNCTION__
+#else
+#ifdef _WIN32
+#define UTIL_FUNC_NAME __FUNCTION__
+#else
+#define UTIL_FUNC_NAME NULL
+#endif
+#endif
+
+#define UTIL_SET_LOCATION(UTIL_e, child, condition) do { \
+ (UTIL_e).SetLocation(__FILE__, __LINE__, UTIL_FUNC_NAME, (child), (condition)); \
+} while (0)
+
+/* Create an instance of Exception, add the message Modify, and throw it.
+ * Modify is appended to the what() message and can contain << for ostream
+ * operations.
+ *
+ * do .. while kludge to swallow trailing ; character
+ * http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html .
+ */
+#define UTIL_THROW(Exception, Modify) do { \
+ Exception UTIL_e; \
+ UTIL_SET_LOCATION(UTIL_e, #Exception, NULL); \
+ UTIL_e << Modify; \
+ throw UTIL_e; \
+} while (0)
+
+#define UTIL_THROW_VAR(Var, Modify) do { \
+ Exception &UTIL_e = (Var); \
+ UTIL_SET_LOCATION(UTIL_e, NULL, NULL); \
+ UTIL_e << Modify; \
+ throw UTIL_e; \
+} while (0)
+
+#define UTIL_THROW_IF(Condition, Exception, Modify) do { \
+ if (Condition) { \
+ Exception UTIL_e; \
+ UTIL_SET_LOCATION(UTIL_e, #Exception, #Condition); \
+ UTIL_e << Modify; \
+ throw UTIL_e; \
+ } \
+} while (0)
class ErrnoException : public Exception {
public:
@@ -51,7 +99,7 @@ class ErrnoException : public Exception {
virtual ~ErrnoException() throw();
- int Error() { return errno_; }
+ int Error() const throw() { return errno_; }
private:
int errno_;
diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc
index f447a70c..cbe4234f 100644
--- a/klm/util/file_piece.cc
+++ b/klm/util/file_piece.cc
@@ -41,8 +41,8 @@ GZException::GZException(void *file) {
const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
int OpenReadOrThrow(const char *name) {
- int ret = open(name, O_RDONLY);
- if (ret == -1) UTIL_THROW(ErrnoException, "in open (" << name << ") for reading");
+ int ret;
+ UTIL_THROW_IF(-1 == (ret = open(name, O_RDONLY)), ErrnoException, "while opening " << name);
return ret;
}
@@ -52,13 +52,13 @@ off_t SizeFile(int fd) {
return sb.st_size;
}
-FilePiece::FilePiece(const char *name, std::ostream *show_progress, off_t min_buffer) throw (GZException) :
+FilePiece::FilePiece(const char *name, std::ostream *show_progress, off_t min_buffer) :
file_(OpenReadOrThrow(name)), total_size_(SizeFile(file_.get())), page_(sysconf(_SC_PAGE_SIZE)),
progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) {
Initialize(name, show_progress, min_buffer);
}
-FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, off_t min_buffer) throw (GZException) :
+FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, off_t min_buffer) :
file_(fd), total_size_(SizeFile(file_.get())), page_(sysconf(_SC_PAGE_SIZE)),
progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) {
Initialize(name, show_progress, min_buffer);
@@ -78,7 +78,7 @@ FilePiece::~FilePiece() {
#endif
}
-StringPiece FilePiece::ReadLine(char delim) throw (GZException, EndOfFileException) {
+StringPiece FilePiece::ReadLine(char delim) {
size_t skip = 0;
while (true) {
for (const char *i = position_ + skip; i < position_end_; ++i) {
@@ -97,20 +97,20 @@ StringPiece FilePiece::ReadLine(char delim) throw (GZException, EndOfFileExcepti
}
}
-float FilePiece::ReadFloat() throw(GZException, EndOfFileException, ParseNumberException) {
+float FilePiece::ReadFloat() {
return ReadNumber<float>();
}
-double FilePiece::ReadDouble() throw(GZException, EndOfFileException, ParseNumberException) {
+double FilePiece::ReadDouble() {
return ReadNumber<double>();
}
-long int FilePiece::ReadLong() throw(GZException, EndOfFileException, ParseNumberException) {
+long int FilePiece::ReadLong() {
return ReadNumber<long int>();
}
-unsigned long int FilePiece::ReadULong() throw(GZException, EndOfFileException, ParseNumberException) {
+unsigned long int FilePiece::ReadULong() {
return ReadNumber<unsigned long int>();
}
-void FilePiece::Initialize(const char *name, std::ostream *show_progress, off_t min_buffer) throw (GZException) {
+void FilePiece::Initialize(const char *name, std::ostream *show_progress, off_t min_buffer) {
#ifdef HAVE_ZLIB
gz_file_ = NULL;
#endif
@@ -163,7 +163,7 @@ void ParseNumber(const char *begin, char *&end, unsigned long int &out) {
}
} // namespace
-template <class T> T FilePiece::ReadNumber() throw(GZException, EndOfFileException, ParseNumberException) {
+template <class T> T FilePiece::ReadNumber() {
SkipSpaces();
while (last_space_ < position_) {
if (at_end_) {
@@ -186,7 +186,7 @@ template <class T> T FilePiece::ReadNumber() throw(GZException, EndOfFileExcepti
return ret;
}
-const char *FilePiece::FindDelimiterOrEOF(const bool *delim) throw (GZException, EndOfFileException) {
+const char *FilePiece::FindDelimiterOrEOF(const bool *delim) {
size_t skip = 0;
while (true) {
for (const char *i = position_ + skip; i < position_end_; ++i) {
@@ -201,7 +201,7 @@ const char *FilePiece::FindDelimiterOrEOF(const bool *delim) throw (GZException,
}
}
-void FilePiece::Shift() throw(GZException, EndOfFileException) {
+void FilePiece::Shift() {
if (at_end_) {
progress_.Finished();
throw EndOfFileException();
@@ -217,7 +217,7 @@ void FilePiece::Shift() throw(GZException, EndOfFileException) {
}
}
-void FilePiece::MMapShift(off_t desired_begin) throw() {
+void FilePiece::MMapShift(off_t desired_begin) {
// Use mmap.
off_t ignore = desired_begin % page_;
// Duplicate request for Shift means give more data.
@@ -259,25 +259,23 @@ void FilePiece::MMapShift(off_t desired_begin) throw() {
progress_.Set(desired_begin);
}
-void FilePiece::TransitionToRead() throw (GZException) {
+void FilePiece::TransitionToRead() {
assert(!fallback_to_read_);
fallback_to_read_ = true;
data_.reset();
data_.reset(malloc(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED);
- if (!data_.get()) UTIL_THROW(ErrnoException, "malloc failed for " << default_map_size_);
+ UTIL_THROW_IF(!data_.get(), ErrnoException, "malloc failed for " << default_map_size_);
position_ = data_.begin();
position_end_ = position_;
#ifdef HAVE_ZLIB
assert(!gz_file_);
gz_file_ = gzdopen(file_.get(), "r");
- if (!gz_file_) {
- UTIL_THROW(GZException, "zlib failed to open " << file_name_);
- }
+ UTIL_THROW_IF(!gz_file_, GZException, "zlib failed to open " << file_name_);
#endif
}
-void FilePiece::ReadShift() throw(GZException, EndOfFileException) {
+void FilePiece::ReadShift() {
assert(fallback_to_read_);
// Bytes [data_.begin(), position_) have been consumed.
// Bytes [position_, position_end_) have been read into the buffer.
@@ -297,7 +295,7 @@ void FilePiece::ReadShift() throw(GZException, EndOfFileException) {
std::size_t valid_length = position_end_ - position_;
default_map_size_ *= 2;
data_.call_realloc(default_map_size_);
- if (!data_.get()) UTIL_THROW(ErrnoException, "realloc failed for " << default_map_size_);
+ UTIL_THROW_IF(!data_.get(), ErrnoException, "realloc failed for " << default_map_size_);
position_ = data_.begin();
position_end_ = position_ + valid_length;
} else {
@@ -320,7 +318,7 @@ void FilePiece::ReadShift() throw(GZException, EndOfFileException) {
}
#else
read_return = read(file_.get(), static_cast<char*>(data_.get()) + already_read, default_map_size_ - already_read);
- if (read_return == -1) UTIL_THROW(ErrnoException, "read failed");
+ UTIL_THROW_IF(read_return == -1, ErrnoException, "read failed");
progress_.Set(mapped_offset_);
#endif
if (read_return == 0) {
diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh
index 870ae5a3..a5c00910 100644
--- a/klm/util/file_piece.hh
+++ b/klm/util/file_piece.hh
@@ -45,13 +45,13 @@ off_t SizeFile(int fd);
class FilePiece {
public:
// 32 MB default.
- explicit FilePiece(const char *file, std::ostream *show_progress = NULL, off_t min_buffer = 33554432) throw(GZException);
+ explicit FilePiece(const char *file, std::ostream *show_progress = NULL, off_t min_buffer = 33554432);
// Takes ownership of fd. name is used for messages.
- explicit FilePiece(int fd, const char *name, std::ostream *show_progress = NULL, off_t min_buffer = 33554432) throw(GZException);
+ explicit FilePiece(int fd, const char *name, std::ostream *show_progress = NULL, off_t min_buffer = 33554432);
~FilePiece();
- char get() throw(GZException, EndOfFileException) {
+ char get() {
if (position_ == position_end_) {
Shift();
if (at_end_) throw EndOfFileException();
@@ -60,22 +60,22 @@ class FilePiece {
}
// Leaves the delimiter, if any, to be returned by get(). Delimiters defined by isspace().
- StringPiece ReadDelimited(const bool *delim = kSpaces) throw(GZException, EndOfFileException) {
+ StringPiece ReadDelimited(const bool *delim = kSpaces) {
SkipSpaces(delim);
return Consume(FindDelimiterOrEOF(delim));
}
// Unlike ReadDelimited, this includes leading spaces and consumes the delimiter.
// It is similar to getline in that way.
- StringPiece ReadLine(char delim = '\n') throw(GZException, EndOfFileException);
+ StringPiece ReadLine(char delim = '\n');
- float ReadFloat() throw(GZException, EndOfFileException, ParseNumberException);
- double ReadDouble() throw(GZException, EndOfFileException, ParseNumberException);
- long int ReadLong() throw(GZException, EndOfFileException, ParseNumberException);
- unsigned long int ReadULong() throw(GZException, EndOfFileException, ParseNumberException);
+ float ReadFloat();
+ double ReadDouble();
+ long int ReadLong();
+ unsigned long int ReadULong();
// Skip spaces defined by isspace.
- void SkipSpaces(const bool *delim = kSpaces) throw (GZException, EndOfFileException) {
+ void SkipSpaces(const bool *delim = kSpaces) {
for (; ; ++position_) {
if (position_ == position_end_) Shift();
if (!delim[static_cast<unsigned char>(*position_)]) return;
@@ -89,9 +89,9 @@ class FilePiece {
const std::string &FileName() const { return file_name_; }
private:
- void Initialize(const char *name, std::ostream *show_progress, off_t min_buffer) throw(GZException);
+ void Initialize(const char *name, std::ostream *show_progress, off_t min_buffer);
- template <class T> T ReadNumber() throw(GZException, EndOfFileException, ParseNumberException);
+ template <class T> T ReadNumber();
StringPiece Consume(const char *to) {
StringPiece ret(position_, to - position_);
@@ -99,14 +99,14 @@ class FilePiece {
return ret;
}
- const char *FindDelimiterOrEOF(const bool *delim = kSpaces) throw (GZException, EndOfFileException);
+ const char *FindDelimiterOrEOF(const bool *delim = kSpaces);
- void Shift() throw (EndOfFileException, GZException);
+ void Shift();
// Backends to Shift().
- void MMapShift(off_t desired_begin) throw ();
+ void MMapShift(off_t desired_begin);
- void TransitionToRead() throw (GZException);
- void ReadShift() throw (GZException, EndOfFileException);
+ void TransitionToRead();
+ void ReadShift();
const char *position_, *last_space_, *position_end_;
diff --git a/klm/util/murmur_hash.cc b/klm/util/murmur_hash.cc
index d58a0727..fec47fd9 100644
--- a/klm/util/murmur_hash.cc
+++ b/klm/util/murmur_hash.cc
@@ -1,129 +1,129 @@
-/* Downloaded from http://sites.google.com/site/murmurhash/ which says "All
- * code is released to the public domain. For business purposes, Murmurhash is
- * under the MIT license."
- * This is modified from the original:
- * ULL tag on 0xc6a4a7935bd1e995 so this will compile on 32-bit.
- * length changed to unsigned int.
- * placed in namespace util
- * add MurmurHashNative
- * default option = 0 for seed
- */
-
-#include "util/murmur_hash.hh"
-
-namespace util {
-
-//-----------------------------------------------------------------------------
-// MurmurHash2, 64-bit versions, by Austin Appleby
-
-// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
-// and endian-ness issues if used across multiple platforms.
-
-// 64-bit hash for 64-bit platforms
-
-uint64_t MurmurHash64A ( const void * key, std::size_t len, unsigned int seed )
-{
- const uint64_t m = 0xc6a4a7935bd1e995ULL;
- const int r = 47;
-
- uint64_t h = seed ^ (len * m);
-
- const uint64_t * data = (const uint64_t *)key;
- const uint64_t * end = data + (len/8);
-
- while(data != end)
- {
- uint64_t k = *data++;
-
- k *= m;
- k ^= k >> r;
- k *= m;
-
- h ^= k;
- h *= m;
- }
-
- const unsigned char * data2 = (const unsigned char*)data;
-
- switch(len & 7)
- {
- case 7: h ^= uint64_t(data2[6]) << 48;
- case 6: h ^= uint64_t(data2[5]) << 40;
- case 5: h ^= uint64_t(data2[4]) << 32;
- case 4: h ^= uint64_t(data2[3]) << 24;
- case 3: h ^= uint64_t(data2[2]) << 16;
- case 2: h ^= uint64_t(data2[1]) << 8;
- case 1: h ^= uint64_t(data2[0]);
- h *= m;
- };
-
- h ^= h >> r;
- h *= m;
- h ^= h >> r;
-
- return h;
-}
-
-
-// 64-bit hash for 32-bit platforms
-
-uint64_t MurmurHash64B ( const void * key, std::size_t len, unsigned int seed )
-{
- const unsigned int m = 0x5bd1e995;
- const int r = 24;
-
- unsigned int h1 = seed ^ len;
- unsigned int h2 = 0;
-
- const unsigned int * data = (const unsigned int *)key;
-
- while(len >= 8)
- {
- unsigned int k1 = *data++;
- k1 *= m; k1 ^= k1 >> r; k1 *= m;
- h1 *= m; h1 ^= k1;
- len -= 4;
-
- unsigned int k2 = *data++;
- k2 *= m; k2 ^= k2 >> r; k2 *= m;
- h2 *= m; h2 ^= k2;
- len -= 4;
- }
-
- if(len >= 4)
- {
- unsigned int k1 = *data++;
- k1 *= m; k1 ^= k1 >> r; k1 *= m;
- h1 *= m; h1 ^= k1;
- len -= 4;
- }
-
- switch(len)
- {
- case 3: h2 ^= ((unsigned char*)data)[2] << 16;
- case 2: h2 ^= ((unsigned char*)data)[1] << 8;
- case 1: h2 ^= ((unsigned char*)data)[0];
- h2 *= m;
- };
-
- h1 ^= h2 >> 18; h1 *= m;
- h2 ^= h1 >> 22; h2 *= m;
- h1 ^= h2 >> 17; h1 *= m;
- h2 ^= h1 >> 19; h2 *= m;
-
- uint64_t h = h1;
-
- h = (h << 32) | h2;
-
- return h;
-}
-
-uint64_t MurmurHashNative(const void * key, std::size_t len, unsigned int seed) {
- if (sizeof(int) == 4) {
- return MurmurHash64B(key, len, seed);
- } else {
- return MurmurHash64A(key, len, seed);
- }
-}
-
-} // namespace util
+/* Downloaded from http://sites.google.com/site/murmurhash/ which says "All
+ * code is released to the public domain. For business purposes, Murmurhash is
+ * under the MIT license."
+ * This is modified from the original:
+ * ULL tag on 0xc6a4a7935bd1e995 so this will compile on 32-bit.
+ * length changed to unsigned int.
+ * placed in namespace util
+ * add MurmurHashNative
+ * default option = 0 for seed
+ */
+
+#include "util/murmur_hash.hh"
+
+namespace util {
+
+//-----------------------------------------------------------------------------
+// MurmurHash2, 64-bit versions, by Austin Appleby
+
+// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
+// and endian-ness issues if used across multiple platforms.
+
+// 64-bit hash for 64-bit platforms
+
+uint64_t MurmurHash64A ( const void * key, std::size_t len, unsigned int seed )
+{
+ const uint64_t m = 0xc6a4a7935bd1e995ULL;
+ const int r = 47;
+
+ uint64_t h = seed ^ (len * m);
+
+ const uint64_t * data = (const uint64_t *)key;
+ const uint64_t * end = data + (len/8);
+
+ while(data != end)
+ {
+ uint64_t k = *data++;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h ^= k;
+ h *= m;
+ }
+
+ const unsigned char * data2 = (const unsigned char*)data;
+
+ switch(len & 7)
+ {
+ case 7: h ^= uint64_t(data2[6]) << 48;
+ case 6: h ^= uint64_t(data2[5]) << 40;
+ case 5: h ^= uint64_t(data2[4]) << 32;
+ case 4: h ^= uint64_t(data2[3]) << 24;
+ case 3: h ^= uint64_t(data2[2]) << 16;
+ case 2: h ^= uint64_t(data2[1]) << 8;
+ case 1: h ^= uint64_t(data2[0]);
+ h *= m;
+ };
+
+ h ^= h >> r;
+ h *= m;
+ h ^= h >> r;
+
+ return h;
+}
+
+
+// 64-bit hash for 32-bit platforms
+
+uint64_t MurmurHash64B ( const void * key, std::size_t len, unsigned int seed )
+{
+ const unsigned int m = 0x5bd1e995;
+ const int r = 24;
+
+ unsigned int h1 = seed ^ len;
+ unsigned int h2 = 0;
+
+ const unsigned int * data = (const unsigned int *)key;
+
+ while(len >= 8)
+ {
+ unsigned int k1 = *data++;
+ k1 *= m; k1 ^= k1 >> r; k1 *= m;
+ h1 *= m; h1 ^= k1;
+ len -= 4;
+
+ unsigned int k2 = *data++;
+ k2 *= m; k2 ^= k2 >> r; k2 *= m;
+ h2 *= m; h2 ^= k2;
+ len -= 4;
+ }
+
+ if(len >= 4)
+ {
+ unsigned int k1 = *data++;
+ k1 *= m; k1 ^= k1 >> r; k1 *= m;
+ h1 *= m; h1 ^= k1;
+ len -= 4;
+ }
+
+ switch(len)
+ {
+ case 3: h2 ^= ((unsigned char*)data)[2] << 16;
+ case 2: h2 ^= ((unsigned char*)data)[1] << 8;
+ case 1: h2 ^= ((unsigned char*)data)[0];
+ h2 *= m;
+ };
+
+ h1 ^= h2 >> 18; h1 *= m;
+ h2 ^= h1 >> 22; h2 *= m;
+ h1 ^= h2 >> 17; h1 *= m;
+ h2 ^= h1 >> 19; h2 *= m;
+
+ uint64_t h = h1;
+
+ h = (h << 32) | h2;
+
+ return h;
+}
+
+uint64_t MurmurHashNative(const void * key, std::size_t len, unsigned int seed) {
+ if (sizeof(int) == 4) {
+ return MurmurHash64B(key, len, seed);
+ } else {
+ return MurmurHash64A(key, len, seed);
+ }
+}
+
+} // namespace util
diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh
index 00be0ed7..2ec342a6 100644
--- a/klm/util/probing_hash_table.hh
+++ b/klm/util/probing_hash_table.hh
@@ -57,7 +57,7 @@ template <class PackingT, class HashT, class EqualT = std::equal_to<typename Pac
equal_(equal_func),
entries_(0)
#ifdef DEBUG
- , initialized_(true),
+ , initialized_(true)
#endif
{}
diff --git a/klm/util/sorted_uniform.hh b/klm/util/sorted_uniform.hh
index 84d7aa02..0d6ecbbd 100644
--- a/klm/util/sorted_uniform.hh
+++ b/klm/util/sorted_uniform.hh
@@ -12,7 +12,7 @@ namespace util {
template <class T> class IdentityAccessor {
public:
typedef T Key;
- T operator()(const uint64_t *in) const { return *in; }
+ T operator()(const T *in) const { return *in; }
};
struct Pivot64 {
@@ -101,6 +101,27 @@ template <class Iterator, class Accessor, class Pivot> bool SortedUniformFind(co
return BoundedSortedUniformFind<Iterator, Accessor, Pivot>(accessor, begin, below, end, above, key, out);
}
+// May return begin - 1.
+template <class Iterator, class Accessor> Iterator BinaryBelow(
+ const Accessor &accessor,
+ Iterator begin,
+ Iterator end,
+ const typename Accessor::Key key) {
+ while (end > begin) {
+ Iterator pivot(begin + (end - begin) / 2);
+ typename Accessor::Key mid(accessor(pivot));
+ if (mid < key) {
+ begin = pivot + 1;
+ } else if (mid > key) {
+ end = pivot;
+ } else {
+ for (++pivot; (pivot < end) && accessor(pivot) == mid; ++pivot) {}
+ return pivot - 1;
+ }
+ }
+ return begin - 1;
+}
+
// To use this template, you need to define a Pivot function to match Key.
template <class PackingT> class SortedUniformMap {
public: