From eecc4cbf5a6e28f910130cd4a58444e9d4701ea9 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Mon, 27 Jan 2014 17:42:19 -0800 Subject: KenLM 5cc905bc2d214efa7de2db56a9a672b749a95591 --- klm/util/probing_hash_table.hh | 84 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 5 deletions(-) (limited to 'klm/util/probing_hash_table.hh') diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh index 51a2944d..38524806 100644 --- a/klm/util/probing_hash_table.hh +++ b/klm/util/probing_hash_table.hh @@ -2,6 +2,7 @@ #define UTIL_PROBING_HASH_TABLE__ #include "util/exception.hh" +#include "util/scoped.hh" #include #include @@ -25,6 +26,8 @@ struct IdentityHash { template T operator()(T arg) const { return arg; } }; +template class AutoProbing; + /* Non-standard hash table * Buckets must be set at the beginning and must be greater than maximum number * of elements, else it throws ProbingSizeException. @@ -33,7 +36,6 @@ struct IdentityHash { * Uses linear probing to find value. * Only insert and lookup operations. */ - template > class ProbingHashTable { public: typedef EntryT Entry; @@ -43,7 +45,6 @@ template (multiplier * static_cast(entries))); return buckets * sizeof(Entry); @@ -69,6 +70,11 @@ template (new_base); + end_ = begin_ + buckets_; + } + template MutableIterator Insert(const T &t) { #ifdef DEBUG assert(initialized_); @@ -82,7 +88,7 @@ template GetKey()); if (equal_(got, t.GetKey())) { out = i; return true; } if (equal_(got, invalid_)) { @@ -97,8 +103,6 @@ template bool UnsafeMutableFind(const Key key, MutableIterator &out) { #ifdef DEBUG @@ -224,6 +228,8 @@ template ; + template MutableIterator Ideal(const T &t) { return begin_ + (hash_(t.GetKey()) % buckets_); } @@ -247,6 +253,74 @@ template > class AutoProbing { + private: + typedef ProbingHashTable Backend; + public: + typedef EntryT Entry; + typedef typename Entry::Key Key; + typedef const Entry *ConstIterator; + typedef Entry *MutableIterator; + typedef HashT Hash; + typedef EqualT Equal; + + AutoProbing(std::size_t initial_size = 10, const Key &invalid = Key(), const Hash &hash_func = Hash(), const Equal &equal_func = Equal()) : + allocated_(Backend::Size(initial_size, 1.5)), mem_(util::MallocOrThrow(allocated_)), backend_(mem_.get(), allocated_, invalid, hash_func, equal_func) { + threshold_ = initial_size * 1.2; + } + + // Assumes that the key is unique. Multiple insertions won't cause a failure, just inconsistent lookup. + template MutableIterator Insert(const T &t) { + DoubleIfNeeded(); + return backend_.UncheckedInsert(t); + } + + template bool FindOrInsert(const T &t, MutableIterator &out) { + DoubleIfNeeded(); + return backend_.FindOrInsert(t, out); + } + + template bool UnsafeMutableFind(const Key key, MutableIterator &out) { + return backend_.UnsafeMutableFind(key, out); + } + + template MutableIterator UnsafeMutableMustFind(const Key key) { + return backend_.UnsafeMutableMustFind(key); + } + + template bool Find(const Key key, ConstIterator &out) const { + return backend_.Find(key, out); + } + + template ConstIterator MustFind(const Key key) const { + return backend_.MustFind(key); + } + + std::size_t Size() const { + return backend_.SizeNoSerialization(); + } + + void Clear() { + backend_.Clear(); + } + + private: + void DoubleIfNeeded() { + if (Size() < threshold_) + return; + mem_.call_realloc(backend_.DoubleTo()); + allocated_ = backend_.DoubleTo(); + backend_.Double(mem_.get()); + threshold_ *= 2; + } + + std::size_t allocated_; + util::scoped_malloc mem_; + Backend backend_; + std::size_t threshold_; +}; + } // namespace util #endif // UTIL_PROBING_HASH_TABLE__ -- cgit v1.2.3