diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2011-01-18 15:55:40 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2011-01-18 15:55:40 -0500 |
commit | bee6a3c3f6c54cf7449229488c6124dddc7e2f31 (patch) | |
tree | f407d57998b648d9341990d4a4de974b5104ed97 /klm/util | |
parent | 10715e9606500b34c87df9de8a39f8a7d6ecd04b (diff) |
new version of klm
Diffstat (limited to 'klm/util')
-rw-r--r-- | klm/util/Makefile.am | 3 | ||||
-rw-r--r-- | klm/util/bit_packing.hh | 1 | ||||
-rw-r--r-- | klm/util/mmap.cc | 4 | ||||
-rw-r--r-- | klm/util/mmap.hh | 2 | ||||
-rw-r--r-- | klm/util/probing_hash_table.hh | 22 | ||||
-rw-r--r-- | klm/util/scoped.hh | 30 | ||||
-rw-r--r-- | klm/util/string_piece.hh | 14 |
7 files changed, 62 insertions, 14 deletions
diff --git a/klm/util/Makefile.am b/klm/util/Makefile.am index 9e38e0f1..f4f7d158 100644 --- a/klm/util/Makefile.am +++ b/klm/util/Makefile.am @@ -25,7 +25,6 @@ libklm_util_a_SOURCES = \ file_piece.cc \ mmap.cc \ murmur_hash.cc \ - scoped.cc \ - string_piece.cc + scoped.cc AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. diff --git a/klm/util/bit_packing.hh b/klm/util/bit_packing.hh index 0fd39d7f..636547b1 100644 --- a/klm/util/bit_packing.hh +++ b/klm/util/bit_packing.hh @@ -73,7 +73,6 @@ inline float ReadNonPositiveFloat31(const void *base, uint8_t bit) { return encoded.f; } inline void WriteNonPositiveFloat31(void *base, uint8_t bit, float value) { - assert(value <= 0.0); detail::FloatEnc encoded; encoded.f = value; encoded.i &= ~0x80000000; diff --git a/klm/util/mmap.cc b/klm/util/mmap.cc index 5a810c64..456ce953 100644 --- a/klm/util/mmap.cc +++ b/klm/util/mmap.cc @@ -77,6 +77,8 @@ void ReadAll(int fd, void *to_void, std::size_t amount) { } } +} // namespace + const int kFileFlags = #ifdef MAP_FILE MAP_FILE | MAP_SHARED @@ -85,8 +87,6 @@ const int kFileFlags = #endif ; -} // namespace - void MapRead(LoadMethod method, int fd, off_t offset, std::size_t size, scoped_memory &out) { switch (method) { case LAZY: diff --git a/klm/util/mmap.hh b/klm/util/mmap.hh index 0a504d89..e4439fa4 100644 --- a/klm/util/mmap.hh +++ b/klm/util/mmap.hh @@ -91,9 +91,11 @@ typedef enum { READ } LoadMethod; +extern const int kFileFlags; // Wrapper around mmap to check it worked and hide some platform macros. void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, off_t offset = 0); + void MapRead(LoadMethod method, int fd, off_t offset, std::size_t size, scoped_memory &out); void *MapAnonymous(std::size_t size); diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh index c3529a7e..7b5cdc22 100644 --- a/klm/util/probing_hash_table.hh +++ b/klm/util/probing_hash_table.hh @@ -1,6 +1,8 @@ #ifndef UTIL_PROBING_HASH_TABLE__ #define UTIL_PROBING_HASH_TABLE__ +#include "util/exception.hh" + #include <algorithm> #include <cstddef> #include <functional> @@ -9,6 +11,13 @@ namespace util { +/* Thrown when table grows too large */ +class ProbingSizeException : public Exception { + public: + ProbingSizeException() throw() {} + ~ProbingSizeException() throw() {} +}; + /* Non-standard hash table * Buckets must be set at the beginning and must be greater than maximum number * of elements, else an infinite loop happens. @@ -33,9 +42,9 @@ template <class PackingT, class HashT, class EqualT = std::equal_to<typename Pac } // Must be assigned to later. - ProbingHashTable() + ProbingHashTable() : entries_(0) #ifdef DEBUG - : initialized_(false), entries_(0) + , initialized_(false) #endif {} @@ -45,17 +54,18 @@ template <class PackingT, class HashT, class EqualT = std::equal_to<typename Pac end_(begin_ + (allocated / Packing::kBytes)), invalid_(invalid), hash_(hash_func), - equal_(equal_func) + equal_(equal_func), + entries_(0) #ifdef DEBUG , initialized_(true), - entries_(0) #endif {} template <class T> void Insert(const T &t) { + if (++entries_ >= buckets_) + UTIL_THROW(ProbingSizeException, "Hash table with " << buckets_ << " buckets is full."); #ifdef DEBUG assert(initialized_); - assert(++entries_ < buckets_); #endif for (MutableIterator i(begin_ + (hash_(t.GetKey()) % buckets_));;) { if (equal_(i->GetKey(), invalid_)) { *i = t; return; } @@ -86,9 +96,9 @@ template <class PackingT, class HashT, class EqualT = std::equal_to<typename Pac Key invalid_; Hash hash_; Equal equal_; + std::size_t entries_; #ifdef DEBUG bool initialized_; - std::size_t entries_; #endif }; diff --git a/klm/util/scoped.hh b/klm/util/scoped.hh index 52864481..d36a7df3 100644 --- a/klm/util/scoped.hh +++ b/klm/util/scoped.hh @@ -20,7 +20,9 @@ template <class T, class R, R (*Free)(T*)> class scoped_thing { } T &operator*() { return *c_; } + const T&operator*() const { return *c_; } T &operator->() { return *c_; } + const T&operator->() const { return *c_; } T *get() { return c_; } const T *get() const { return c_; } @@ -80,6 +82,34 @@ class scoped_FILE { std::FILE *file_; }; +// Hat tip to boost. +template <class T> class scoped_array { + public: + explicit scoped_array(T *content = NULL) : c_(content) {} + + ~scoped_array() { delete [] c_; } + + T *get() { return c_; } + const T* get() const { return c_; } + + T &operator*() { return *c_; } + const T&operator*() const { return *c_; } + + T &operator->() { return *c_; } + const T&operator->() const { return *c_; } + + T &operator[](std::size_t idx) { return c_[idx]; } + const T &operator[](std::size_t idx) const { return c_[idx]; } + + void reset(T *to = NULL) { + scoped_array<T> other(c_); + c_ = to; + } + + private: + T *c_; +}; + } // namespace util #endif // UTIL_SCOPED__ diff --git a/klm/util/string_piece.hh b/klm/util/string_piece.hh index 3ac2f8a7..2583db5e 100644 --- a/klm/util/string_piece.hh +++ b/klm/util/string_piece.hh @@ -53,8 +53,13 @@ //Uncomment this line if you want boost hashing for your StringPieces. //#define HAVE_BOOST +#ifdef HAVE_BOOST +#include <boost/functional/hash/hash.hpp> +#endif // HAVE_BOOST + #include <cstring> #include <iosfwd> +#include <ostream> #ifdef HAVE_ICU #include <unicode/stringpiece.h> @@ -62,7 +67,6 @@ U_NAMESPACE_BEGIN #else #include <algorithm> -#include <ostream> #include <string> #include <string.h> @@ -228,10 +232,14 @@ inline bool operator>=(const StringPiece& x, const StringPiece& y) { } // allow StringPiece to be logged (needed for unit testing). -extern std::ostream& operator<<(std::ostream& o, const StringPiece& piece); +inline std::ostream& operator<<(std::ostream& o, const StringPiece& piece) { + return o.write(piece.data(), static_cast<std::streamsize>(piece.size())); +} #ifdef HAVE_BOOST -size_t hash_value(const StringPiece &str); +inline size_t hash_value(const StringPiece &str) { + return boost::hash_range(str.data(), str.data() + str.length()); +} /* Support for lookup of StringPiece in boost::unordered_map<std::string> */ struct StringPieceCompatibleHash : public std::unary_function<const StringPiece &, size_t> { |