diff options
author | Patrick Simianer <p@simianer.de> | 2011-09-09 15:33:35 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2011-09-23 19:13:58 +0200 |
commit | fe6acf199a5749f0a604a95e8d7af59bccc7505e (patch) | |
tree | 5aaad9282a65deb3b3b560bd3e643b845a42987a /klm/util | |
parent | ef74e67449515ff68f598f06ffc9d221eb13f919 (diff) |
partial merge, ruleid feature
Diffstat (limited to 'klm/util')
-rw-r--r-- | klm/util/bit_packing.hh | 13 | ||||
-rw-r--r-- | klm/util/exception.cc | 28 | ||||
-rw-r--r-- | klm/util/exception.hh | 56 | ||||
-rw-r--r-- | klm/util/file_piece.cc | 42 | ||||
-rw-r--r-- | klm/util/file_piece.hh | 34 | ||||
-rw-r--r-- | klm/util/murmur_hash.cc | 258 | ||||
-rw-r--r-- | klm/util/probing_hash_table.hh | 2 | ||||
-rw-r--r-- | klm/util/sorted_uniform.hh | 23 |
8 files changed, 281 insertions, 175 deletions
diff --git a/klm/util/bit_packing.hh b/klm/util/bit_packing.hh index b35d80c8..9f47d559 100644 --- a/klm/util/bit_packing.hh +++ b/klm/util/bit_packing.hh @@ -107,9 +107,20 @@ void BitPackingSanity(); uint8_t RequiredBits(uint64_t max_value); struct BitsMask { + static BitsMask ByMax(uint64_t max_value) { + BitsMask ret; + ret.FromMax(max_value); + return ret; + } + static BitsMask ByBits(uint8_t bits) { + BitsMask ret; + ret.bits = bits; + ret.mask = (1ULL << bits) - 1; + return ret; + } void FromMax(uint64_t max_value) { bits = RequiredBits(max_value); - mask = (1 << bits) - 1; + mask = (1ULL << bits) - 1; } uint8_t bits; uint64_t mask; diff --git a/klm/util/exception.cc b/klm/util/exception.cc index 84f9fe7c..62280970 100644 --- a/klm/util/exception.cc +++ b/klm/util/exception.cc @@ -1,5 +1,9 @@ #include "util/exception.hh" +#ifdef __GXX_RTTI +#include <typeinfo> +#endif + #include <errno.h> #include <string.h> @@ -22,6 +26,30 @@ const char *Exception::what() const throw() { return text_.c_str(); } +void Exception::SetLocation(const char *file, unsigned int line, const char *func, const char *child_name, const char *condition) { + /* The child class might have set some text, but we want this to come first. + * Another option would be passing this information to the constructor, but + * then child classes would have to accept constructor arguments and pass + * them down. + */ + text_ = stream_.str(); + stream_.str(""); + stream_ << file << ':' << line; + if (func) stream_ << " in " << func << " threw "; + if (child_name) { + stream_ << child_name; + } else { +#ifdef __GXX_RTTI + stream_ << typeid(this).name(); +#else + stream_ << "an exception"; +#endif + } + if (condition) stream_ << " because `" << condition; + stream_ << "'.\n"; + stream_ << text_; +} + namespace { // The XOPEN version. const char *HandleStrerror(int ret, const char *buf) { diff --git a/klm/util/exception.hh b/klm/util/exception.hh index c6936914..81675a57 100644 --- a/klm/util/exception.hh +++ b/klm/util/exception.hh @@ -1,8 +1,6 @@ #ifndef UTIL_EXCEPTION__ #define UTIL_EXCEPTION__ -#include "util/string_piece.hh" - #include <exception> #include <sstream> #include <string> @@ -22,6 +20,14 @@ class Exception : public std::exception { // Not threadsafe, but probably doesn't matter. FWIW, Boost's exception guidance implies that what() isn't threadsafe. const char *what() const throw(); + // For use by the UTIL_THROW macros. + void SetLocation( + const char *file, + unsigned int line, + const char *func, + const char *child_name, + const char *condition); + private: template <class Except, class Data> friend typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data); @@ -43,7 +49,49 @@ template <class Except, class Data> typename Except::template ExceptionTag<Excep return e; } -#define UTIL_THROW(Exception, Modify) { Exception UTIL_e; {UTIL_e << Modify;} throw UTIL_e; } +#ifdef __GNUC__ +#define UTIL_FUNC_NAME __PRETTY_FUNCTION__ +#else +#ifdef _WIN32 +#define UTIL_FUNC_NAME __FUNCTION__ +#else +#define UTIL_FUNC_NAME NULL +#endif +#endif + +#define UTIL_SET_LOCATION(UTIL_e, child, condition) do { \ + (UTIL_e).SetLocation(__FILE__, __LINE__, UTIL_FUNC_NAME, (child), (condition)); \ +} while (0) + +/* Create an instance of Exception, add the message Modify, and throw it. + * Modify is appended to the what() message and can contain << for ostream + * operations. + * + * do .. while kludge to swallow trailing ; character + * http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html . + */ +#define UTIL_THROW(Exception, Modify) do { \ + Exception UTIL_e; \ + UTIL_SET_LOCATION(UTIL_e, #Exception, NULL); \ + UTIL_e << Modify; \ + throw UTIL_e; \ +} while (0) + +#define UTIL_THROW_VAR(Var, Modify) do { \ + Exception &UTIL_e = (Var); \ + UTIL_SET_LOCATION(UTIL_e, NULL, NULL); \ + UTIL_e << Modify; \ + throw UTIL_e; \ +} while (0) + +#define UTIL_THROW_IF(Condition, Exception, Modify) do { \ + if (Condition) { \ + Exception UTIL_e; \ + UTIL_SET_LOCATION(UTIL_e, #Exception, #Condition); \ + UTIL_e << Modify; \ + throw UTIL_e; \ + } \ +} while (0) class ErrnoException : public Exception { public: @@ -51,7 +99,7 @@ class ErrnoException : public Exception { virtual ~ErrnoException() throw(); - int Error() { return errno_; } + int Error() const throw() { return errno_; } private: int errno_; diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index f447a70c..cbe4234f 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -41,8 +41,8 @@ GZException::GZException(void *file) { const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; int OpenReadOrThrow(const char *name) { - int ret = open(name, O_RDONLY); - if (ret == -1) UTIL_THROW(ErrnoException, "in open (" << name << ") for reading"); + int ret; + UTIL_THROW_IF(-1 == (ret = open(name, O_RDONLY)), ErrnoException, "while opening " << name); return ret; } @@ -52,13 +52,13 @@ off_t SizeFile(int fd) { return sb.st_size; } -FilePiece::FilePiece(const char *name, std::ostream *show_progress, off_t min_buffer) throw (GZException) : +FilePiece::FilePiece(const char *name, std::ostream *show_progress, off_t min_buffer) : file_(OpenReadOrThrow(name)), total_size_(SizeFile(file_.get())), page_(sysconf(_SC_PAGE_SIZE)), progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) { Initialize(name, show_progress, min_buffer); } -FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, off_t min_buffer) throw (GZException) : +FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, off_t min_buffer) : file_(fd), total_size_(SizeFile(file_.get())), page_(sysconf(_SC_PAGE_SIZE)), progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) { Initialize(name, show_progress, min_buffer); @@ -78,7 +78,7 @@ FilePiece::~FilePiece() { #endif } -StringPiece FilePiece::ReadLine(char delim) throw (GZException, EndOfFileException) { +StringPiece FilePiece::ReadLine(char delim) { size_t skip = 0; while (true) { for (const char *i = position_ + skip; i < position_end_; ++i) { @@ -97,20 +97,20 @@ StringPiece FilePiece::ReadLine(char delim) throw (GZException, EndOfFileExcepti } } -float FilePiece::ReadFloat() throw(GZException, EndOfFileException, ParseNumberException) { +float FilePiece::ReadFloat() { return ReadNumber<float>(); } -double FilePiece::ReadDouble() throw(GZException, EndOfFileException, ParseNumberException) { +double FilePiece::ReadDouble() { return ReadNumber<double>(); } -long int FilePiece::ReadLong() throw(GZException, EndOfFileException, ParseNumberException) { +long int FilePiece::ReadLong() { return ReadNumber<long int>(); } -unsigned long int FilePiece::ReadULong() throw(GZException, EndOfFileException, ParseNumberException) { +unsigned long int FilePiece::ReadULong() { return ReadNumber<unsigned long int>(); } -void FilePiece::Initialize(const char *name, std::ostream *show_progress, off_t min_buffer) throw (GZException) { +void FilePiece::Initialize(const char *name, std::ostream *show_progress, off_t min_buffer) { #ifdef HAVE_ZLIB gz_file_ = NULL; #endif @@ -163,7 +163,7 @@ void ParseNumber(const char *begin, char *&end, unsigned long int &out) { } } // namespace -template <class T> T FilePiece::ReadNumber() throw(GZException, EndOfFileException, ParseNumberException) { +template <class T> T FilePiece::ReadNumber() { SkipSpaces(); while (last_space_ < position_) { if (at_end_) { @@ -186,7 +186,7 @@ template <class T> T FilePiece::ReadNumber() throw(GZException, EndOfFileExcepti return ret; } -const char *FilePiece::FindDelimiterOrEOF(const bool *delim) throw (GZException, EndOfFileException) { +const char *FilePiece::FindDelimiterOrEOF(const bool *delim) { size_t skip = 0; while (true) { for (const char *i = position_ + skip; i < position_end_; ++i) { @@ -201,7 +201,7 @@ const char *FilePiece::FindDelimiterOrEOF(const bool *delim) throw (GZException, } } -void FilePiece::Shift() throw(GZException, EndOfFileException) { +void FilePiece::Shift() { if (at_end_) { progress_.Finished(); throw EndOfFileException(); @@ -217,7 +217,7 @@ void FilePiece::Shift() throw(GZException, EndOfFileException) { } } -void FilePiece::MMapShift(off_t desired_begin) throw() { +void FilePiece::MMapShift(off_t desired_begin) { // Use mmap. off_t ignore = desired_begin % page_; // Duplicate request for Shift means give more data. @@ -259,25 +259,23 @@ void FilePiece::MMapShift(off_t desired_begin) throw() { progress_.Set(desired_begin); } -void FilePiece::TransitionToRead() throw (GZException) { +void FilePiece::TransitionToRead() { assert(!fallback_to_read_); fallback_to_read_ = true; data_.reset(); data_.reset(malloc(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED); - if (!data_.get()) UTIL_THROW(ErrnoException, "malloc failed for " << default_map_size_); + UTIL_THROW_IF(!data_.get(), ErrnoException, "malloc failed for " << default_map_size_); position_ = data_.begin(); position_end_ = position_; #ifdef HAVE_ZLIB assert(!gz_file_); gz_file_ = gzdopen(file_.get(), "r"); - if (!gz_file_) { - UTIL_THROW(GZException, "zlib failed to open " << file_name_); - } + UTIL_THROW_IF(!gz_file_, GZException, "zlib failed to open " << file_name_); #endif } -void FilePiece::ReadShift() throw(GZException, EndOfFileException) { +void FilePiece::ReadShift() { assert(fallback_to_read_); // Bytes [data_.begin(), position_) have been consumed. // Bytes [position_, position_end_) have been read into the buffer. @@ -297,7 +295,7 @@ void FilePiece::ReadShift() throw(GZException, EndOfFileException) { std::size_t valid_length = position_end_ - position_; default_map_size_ *= 2; data_.call_realloc(default_map_size_); - if (!data_.get()) UTIL_THROW(ErrnoException, "realloc failed for " << default_map_size_); + UTIL_THROW_IF(!data_.get(), ErrnoException, "realloc failed for " << default_map_size_); position_ = data_.begin(); position_end_ = position_ + valid_length; } else { @@ -320,7 +318,7 @@ void FilePiece::ReadShift() throw(GZException, EndOfFileException) { } #else read_return = read(file_.get(), static_cast<char*>(data_.get()) + already_read, default_map_size_ - already_read); - if (read_return == -1) UTIL_THROW(ErrnoException, "read failed"); + UTIL_THROW_IF(read_return == -1, ErrnoException, "read failed"); progress_.Set(mapped_offset_); #endif if (read_return == 0) { diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh index 870ae5a3..a5c00910 100644 --- a/klm/util/file_piece.hh +++ b/klm/util/file_piece.hh @@ -45,13 +45,13 @@ off_t SizeFile(int fd); class FilePiece { public: // 32 MB default. - explicit FilePiece(const char *file, std::ostream *show_progress = NULL, off_t min_buffer = 33554432) throw(GZException); + explicit FilePiece(const char *file, std::ostream *show_progress = NULL, off_t min_buffer = 33554432); // Takes ownership of fd. name is used for messages. - explicit FilePiece(int fd, const char *name, std::ostream *show_progress = NULL, off_t min_buffer = 33554432) throw(GZException); + explicit FilePiece(int fd, const char *name, std::ostream *show_progress = NULL, off_t min_buffer = 33554432); ~FilePiece(); - char get() throw(GZException, EndOfFileException) { + char get() { if (position_ == position_end_) { Shift(); if (at_end_) throw EndOfFileException(); @@ -60,22 +60,22 @@ class FilePiece { } // Leaves the delimiter, if any, to be returned by get(). Delimiters defined by isspace(). - StringPiece ReadDelimited(const bool *delim = kSpaces) throw(GZException, EndOfFileException) { + StringPiece ReadDelimited(const bool *delim = kSpaces) { SkipSpaces(delim); return Consume(FindDelimiterOrEOF(delim)); } // Unlike ReadDelimited, this includes leading spaces and consumes the delimiter. // It is similar to getline in that way. - StringPiece ReadLine(char delim = '\n') throw(GZException, EndOfFileException); + StringPiece ReadLine(char delim = '\n'); - float ReadFloat() throw(GZException, EndOfFileException, ParseNumberException); - double ReadDouble() throw(GZException, EndOfFileException, ParseNumberException); - long int ReadLong() throw(GZException, EndOfFileException, ParseNumberException); - unsigned long int ReadULong() throw(GZException, EndOfFileException, ParseNumberException); + float ReadFloat(); + double ReadDouble(); + long int ReadLong(); + unsigned long int ReadULong(); // Skip spaces defined by isspace. - void SkipSpaces(const bool *delim = kSpaces) throw (GZException, EndOfFileException) { + void SkipSpaces(const bool *delim = kSpaces) { for (; ; ++position_) { if (position_ == position_end_) Shift(); if (!delim[static_cast<unsigned char>(*position_)]) return; @@ -89,9 +89,9 @@ class FilePiece { const std::string &FileName() const { return file_name_; } private: - void Initialize(const char *name, std::ostream *show_progress, off_t min_buffer) throw(GZException); + void Initialize(const char *name, std::ostream *show_progress, off_t min_buffer); - template <class T> T ReadNumber() throw(GZException, EndOfFileException, ParseNumberException); + template <class T> T ReadNumber(); StringPiece Consume(const char *to) { StringPiece ret(position_, to - position_); @@ -99,14 +99,14 @@ class FilePiece { return ret; } - const char *FindDelimiterOrEOF(const bool *delim = kSpaces) throw (GZException, EndOfFileException); + const char *FindDelimiterOrEOF(const bool *delim = kSpaces); - void Shift() throw (EndOfFileException, GZException); + void Shift(); // Backends to Shift(). - void MMapShift(off_t desired_begin) throw (); + void MMapShift(off_t desired_begin); - void TransitionToRead() throw (GZException); - void ReadShift() throw (GZException, EndOfFileException); + void TransitionToRead(); + void ReadShift(); const char *position_, *last_space_, *position_end_; diff --git a/klm/util/murmur_hash.cc b/klm/util/murmur_hash.cc index d58a0727..fec47fd9 100644 --- a/klm/util/murmur_hash.cc +++ b/klm/util/murmur_hash.cc @@ -1,129 +1,129 @@ -/* Downloaded from http://sites.google.com/site/murmurhash/ which says "All
- * code is released to the public domain. For business purposes, Murmurhash is
- * under the MIT license."
- * This is modified from the original:
- * ULL tag on 0xc6a4a7935bd1e995 so this will compile on 32-bit.
- * length changed to unsigned int.
- * placed in namespace util
- * add MurmurHashNative
- * default option = 0 for seed
- */
-
-#include "util/murmur_hash.hh"
-
-namespace util {
-
-//-----------------------------------------------------------------------------
-// MurmurHash2, 64-bit versions, by Austin Appleby
-
-// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
-// and endian-ness issues if used across multiple platforms.
-
-// 64-bit hash for 64-bit platforms
-
-uint64_t MurmurHash64A ( const void * key, std::size_t len, unsigned int seed )
-{
- const uint64_t m = 0xc6a4a7935bd1e995ULL;
- const int r = 47;
-
- uint64_t h = seed ^ (len * m);
-
- const uint64_t * data = (const uint64_t *)key;
- const uint64_t * end = data + (len/8);
-
- while(data != end)
- {
- uint64_t k = *data++;
-
- k *= m;
- k ^= k >> r;
- k *= m;
-
- h ^= k;
- h *= m;
- }
-
- const unsigned char * data2 = (const unsigned char*)data;
-
- switch(len & 7)
- {
- case 7: h ^= uint64_t(data2[6]) << 48;
- case 6: h ^= uint64_t(data2[5]) << 40;
- case 5: h ^= uint64_t(data2[4]) << 32;
- case 4: h ^= uint64_t(data2[3]) << 24;
- case 3: h ^= uint64_t(data2[2]) << 16;
- case 2: h ^= uint64_t(data2[1]) << 8;
- case 1: h ^= uint64_t(data2[0]);
- h *= m;
- };
-
- h ^= h >> r;
- h *= m;
- h ^= h >> r;
-
- return h;
-}
-
-
-// 64-bit hash for 32-bit platforms
-
-uint64_t MurmurHash64B ( const void * key, std::size_t len, unsigned int seed )
-{
- const unsigned int m = 0x5bd1e995;
- const int r = 24;
-
- unsigned int h1 = seed ^ len;
- unsigned int h2 = 0;
-
- const unsigned int * data = (const unsigned int *)key;
-
- while(len >= 8)
- {
- unsigned int k1 = *data++;
- k1 *= m; k1 ^= k1 >> r; k1 *= m;
- h1 *= m; h1 ^= k1;
- len -= 4;
-
- unsigned int k2 = *data++;
- k2 *= m; k2 ^= k2 >> r; k2 *= m;
- h2 *= m; h2 ^= k2;
- len -= 4;
- }
-
- if(len >= 4)
- {
- unsigned int k1 = *data++;
- k1 *= m; k1 ^= k1 >> r; k1 *= m;
- h1 *= m; h1 ^= k1;
- len -= 4;
- }
-
- switch(len)
- {
- case 3: h2 ^= ((unsigned char*)data)[2] << 16;
- case 2: h2 ^= ((unsigned char*)data)[1] << 8;
- case 1: h2 ^= ((unsigned char*)data)[0];
- h2 *= m;
- };
-
- h1 ^= h2 >> 18; h1 *= m;
- h2 ^= h1 >> 22; h2 *= m;
- h1 ^= h2 >> 17; h1 *= m;
- h2 ^= h1 >> 19; h2 *= m;
-
- uint64_t h = h1;
-
- h = (h << 32) | h2;
-
- return h;
-}
-
-uint64_t MurmurHashNative(const void * key, std::size_t len, unsigned int seed) {
- if (sizeof(int) == 4) {
- return MurmurHash64B(key, len, seed);
- } else {
- return MurmurHash64A(key, len, seed);
- }
-}
-
-} // namespace util
+/* Downloaded from http://sites.google.com/site/murmurhash/ which says "All + * code is released to the public domain. For business purposes, Murmurhash is + * under the MIT license." + * This is modified from the original: + * ULL tag on 0xc6a4a7935bd1e995 so this will compile on 32-bit. + * length changed to unsigned int. + * placed in namespace util + * add MurmurHashNative + * default option = 0 for seed + */ + +#include "util/murmur_hash.hh" + +namespace util { + +//----------------------------------------------------------------------------- +// MurmurHash2, 64-bit versions, by Austin Appleby + +// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment +// and endian-ness issues if used across multiple platforms. + +// 64-bit hash for 64-bit platforms + +uint64_t MurmurHash64A ( const void * key, std::size_t len, unsigned int seed ) +{ + const uint64_t m = 0xc6a4a7935bd1e995ULL; + const int r = 47; + + uint64_t h = seed ^ (len * m); + + const uint64_t * data = (const uint64_t *)key; + const uint64_t * end = data + (len/8); + + while(data != end) + { + uint64_t k = *data++; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + const unsigned char * data2 = (const unsigned char*)data; + + switch(len & 7) + { + case 7: h ^= uint64_t(data2[6]) << 48; + case 6: h ^= uint64_t(data2[5]) << 40; + case 5: h ^= uint64_t(data2[4]) << 32; + case 4: h ^= uint64_t(data2[3]) << 24; + case 3: h ^= uint64_t(data2[2]) << 16; + case 2: h ^= uint64_t(data2[1]) << 8; + case 1: h ^= uint64_t(data2[0]); + h *= m; + }; + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} + + +// 64-bit hash for 32-bit platforms + +uint64_t MurmurHash64B ( const void * key, std::size_t len, unsigned int seed ) +{ + const unsigned int m = 0x5bd1e995; + const int r = 24; + + unsigned int h1 = seed ^ len; + unsigned int h2 = 0; + + const unsigned int * data = (const unsigned int *)key; + + while(len >= 8) + { + unsigned int k1 = *data++; + k1 *= m; k1 ^= k1 >> r; k1 *= m; + h1 *= m; h1 ^= k1; + len -= 4; + + unsigned int k2 = *data++; + k2 *= m; k2 ^= k2 >> r; k2 *= m; + h2 *= m; h2 ^= k2; + len -= 4; + } + + if(len >= 4) + { + unsigned int k1 = *data++; + k1 *= m; k1 ^= k1 >> r; k1 *= m; + h1 *= m; h1 ^= k1; + len -= 4; + } + + switch(len) + { + case 3: h2 ^= ((unsigned char*)data)[2] << 16; + case 2: h2 ^= ((unsigned char*)data)[1] << 8; + case 1: h2 ^= ((unsigned char*)data)[0]; + h2 *= m; + }; + + h1 ^= h2 >> 18; h1 *= m; + h2 ^= h1 >> 22; h2 *= m; + h1 ^= h2 >> 17; h1 *= m; + h2 ^= h1 >> 19; h2 *= m; + + uint64_t h = h1; + + h = (h << 32) | h2; + + return h; +} + +uint64_t MurmurHashNative(const void * key, std::size_t len, unsigned int seed) { + if (sizeof(int) == 4) { + return MurmurHash64B(key, len, seed); + } else { + return MurmurHash64A(key, len, seed); + } +} + +} // namespace util diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh index 00be0ed7..2ec342a6 100644 --- a/klm/util/probing_hash_table.hh +++ b/klm/util/probing_hash_table.hh @@ -57,7 +57,7 @@ template <class PackingT, class HashT, class EqualT = std::equal_to<typename Pac equal_(equal_func), entries_(0) #ifdef DEBUG - , initialized_(true), + , initialized_(true) #endif {} diff --git a/klm/util/sorted_uniform.hh b/klm/util/sorted_uniform.hh index 84d7aa02..0d6ecbbd 100644 --- a/klm/util/sorted_uniform.hh +++ b/klm/util/sorted_uniform.hh @@ -12,7 +12,7 @@ namespace util { template <class T> class IdentityAccessor { public: typedef T Key; - T operator()(const uint64_t *in) const { return *in; } + T operator()(const T *in) const { return *in; } }; struct Pivot64 { @@ -101,6 +101,27 @@ template <class Iterator, class Accessor, class Pivot> bool SortedUniformFind(co return BoundedSortedUniformFind<Iterator, Accessor, Pivot>(accessor, begin, below, end, above, key, out); } +// May return begin - 1. +template <class Iterator, class Accessor> Iterator BinaryBelow( + const Accessor &accessor, + Iterator begin, + Iterator end, + const typename Accessor::Key key) { + while (end > begin) { + Iterator pivot(begin + (end - begin) / 2); + typename Accessor::Key mid(accessor(pivot)); + if (mid < key) { + begin = pivot + 1; + } else if (mid > key) { + end = pivot; + } else { + for (++pivot; (pivot < end) && accessor(pivot) == mid; ++pivot) {} + return pivot - 1; + } + } + return begin - 1; +} + // To use this template, you need to define a Pivot function to match Key. template <class PackingT> class SortedUniformMap { public: |