From bee6a3c3f6c54cf7449229488c6124dddc7e2f31 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 18 Jan 2011 15:55:40 -0500 Subject: new version of klm --- klm/util/Makefile.am | 3 +-- klm/util/bit_packing.hh | 1 - klm/util/mmap.cc | 4 ++-- klm/util/mmap.hh | 2 ++ klm/util/probing_hash_table.hh | 22 ++++++++++++++++------ klm/util/scoped.hh | 30 ++++++++++++++++++++++++++++++ klm/util/string_piece.hh | 14 +++++++++++--- 7 files changed, 62 insertions(+), 14 deletions(-) (limited to 'klm/util') diff --git a/klm/util/Makefile.am b/klm/util/Makefile.am index 9e38e0f1..f4f7d158 100644 --- a/klm/util/Makefile.am +++ b/klm/util/Makefile.am @@ -25,7 +25,6 @@ libklm_util_a_SOURCES = \ file_piece.cc \ mmap.cc \ murmur_hash.cc \ - scoped.cc \ - string_piece.cc + scoped.cc AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. diff --git a/klm/util/bit_packing.hh b/klm/util/bit_packing.hh index 0fd39d7f..636547b1 100644 --- a/klm/util/bit_packing.hh +++ b/klm/util/bit_packing.hh @@ -73,7 +73,6 @@ inline float ReadNonPositiveFloat31(const void *base, uint8_t bit) { return encoded.f; } inline void WriteNonPositiveFloat31(void *base, uint8_t bit, float value) { - assert(value <= 0.0); detail::FloatEnc encoded; encoded.f = value; encoded.i &= ~0x80000000; diff --git a/klm/util/mmap.cc b/klm/util/mmap.cc index 5a810c64..456ce953 100644 --- a/klm/util/mmap.cc +++ b/klm/util/mmap.cc @@ -77,6 +77,8 @@ void ReadAll(int fd, void *to_void, std::size_t amount) { } } +} // namespace + const int kFileFlags = #ifdef MAP_FILE MAP_FILE | MAP_SHARED @@ -85,8 +87,6 @@ const int kFileFlags = #endif ; -} // namespace - void MapRead(LoadMethod method, int fd, off_t offset, std::size_t size, scoped_memory &out) { switch (method) { case LAZY: diff --git a/klm/util/mmap.hh b/klm/util/mmap.hh index 0a504d89..e4439fa4 100644 --- a/klm/util/mmap.hh +++ b/klm/util/mmap.hh @@ -91,9 +91,11 @@ typedef enum { READ } LoadMethod; +extern const int kFileFlags; // Wrapper around mmap to check it worked and hide some platform macros. void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, off_t offset = 0); + void MapRead(LoadMethod method, int fd, off_t offset, std::size_t size, scoped_memory &out); void *MapAnonymous(std::size_t size); diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh index c3529a7e..7b5cdc22 100644 --- a/klm/util/probing_hash_table.hh +++ b/klm/util/probing_hash_table.hh @@ -1,6 +1,8 @@ #ifndef UTIL_PROBING_HASH_TABLE__ #define UTIL_PROBING_HASH_TABLE__ +#include "util/exception.hh" + #include #include #include @@ -9,6 +11,13 @@ namespace util { +/* Thrown when table grows too large */ +class ProbingSizeException : public Exception { + public: + ProbingSizeException() throw() {} + ~ProbingSizeException() throw() {} +}; + /* Non-standard hash table * Buckets must be set at the beginning and must be greater than maximum number * of elements, else an infinite loop happens. @@ -33,9 +42,9 @@ template void Insert(const T &t) { + if (++entries_ >= buckets_) + UTIL_THROW(ProbingSizeException, "Hash table with " << buckets_ << " buckets is full."); #ifdef DEBUG assert(initialized_); - assert(++entries_ < buckets_); #endif for (MutableIterator i(begin_ + (hash_(t.GetKey()) % buckets_));;) { if (equal_(i->GetKey(), invalid_)) { *i = t; return; } @@ -86,9 +96,9 @@ template class scoped_thing { } T &operator*() { return *c_; } + const T&operator*() const { return *c_; } T &operator->() { return *c_; } + const T&operator->() const { return *c_; } T *get() { return c_; } const T *get() const { return c_; } @@ -80,6 +82,34 @@ class scoped_FILE { std::FILE *file_; }; +// Hat tip to boost. +template class scoped_array { + public: + explicit scoped_array(T *content = NULL) : c_(content) {} + + ~scoped_array() { delete [] c_; } + + T *get() { return c_; } + const T* get() const { return c_; } + + T &operator*() { return *c_; } + const T&operator*() const { return *c_; } + + T &operator->() { return *c_; } + const T&operator->() const { return *c_; } + + T &operator[](std::size_t idx) { return c_[idx]; } + const T &operator[](std::size_t idx) const { return c_[idx]; } + + void reset(T *to = NULL) { + scoped_array other(c_); + c_ = to; + } + + private: + T *c_; +}; + } // namespace util #endif // UTIL_SCOPED__ diff --git a/klm/util/string_piece.hh b/klm/util/string_piece.hh index 3ac2f8a7..2583db5e 100644 --- a/klm/util/string_piece.hh +++ b/klm/util/string_piece.hh @@ -53,8 +53,13 @@ //Uncomment this line if you want boost hashing for your StringPieces. //#define HAVE_BOOST +#ifdef HAVE_BOOST +#include +#endif // HAVE_BOOST + #include #include +#include #ifdef HAVE_ICU #include @@ -62,7 +67,6 @@ U_NAMESPACE_BEGIN #else #include -#include #include #include @@ -228,10 +232,14 @@ inline bool operator>=(const StringPiece& x, const StringPiece& y) { } // allow StringPiece to be logged (needed for unit testing). -extern std::ostream& operator<<(std::ostream& o, const StringPiece& piece); +inline std::ostream& operator<<(std::ostream& o, const StringPiece& piece) { + return o.write(piece.data(), static_cast(piece.size())); +} #ifdef HAVE_BOOST -size_t hash_value(const StringPiece &str); +inline size_t hash_value(const StringPiece &str) { + return boost::hash_range(str.data(), str.data() + str.length()); +} /* Support for lookup of StringPiece in boost::unordered_map */ struct StringPieceCompatibleHash : public std::unary_function { -- cgit v1.2.3