summaryrefslogtreecommitdiff
path: root/klm/util
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-01-18 15:55:40 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2011-01-18 15:55:40 -0500
commitd04c0ca2d9df0e147239b18e90650ca8bd51d594 (patch)
tree1e728067b0606870df89961d10922b4226e614bb /klm/util
parentb49944ee0e5f347a936df244a7c354a867c79c93 (diff)
new version of klm
Diffstat (limited to 'klm/util')
-rw-r--r--klm/util/Makefile.am3
-rw-r--r--klm/util/bit_packing.hh1
-rw-r--r--klm/util/mmap.cc4
-rw-r--r--klm/util/mmap.hh2
-rw-r--r--klm/util/probing_hash_table.hh22
-rw-r--r--klm/util/scoped.hh30
-rw-r--r--klm/util/string_piece.hh14
7 files changed, 62 insertions, 14 deletions
diff --git a/klm/util/Makefile.am b/klm/util/Makefile.am
index 9e38e0f1..f4f7d158 100644
--- a/klm/util/Makefile.am
+++ b/klm/util/Makefile.am
@@ -25,7 +25,6 @@ libklm_util_a_SOURCES = \
file_piece.cc \
mmap.cc \
murmur_hash.cc \
- scoped.cc \
- string_piece.cc
+ scoped.cc
AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I..
diff --git a/klm/util/bit_packing.hh b/klm/util/bit_packing.hh
index 0fd39d7f..636547b1 100644
--- a/klm/util/bit_packing.hh
+++ b/klm/util/bit_packing.hh
@@ -73,7 +73,6 @@ inline float ReadNonPositiveFloat31(const void *base, uint8_t bit) {
return encoded.f;
}
inline void WriteNonPositiveFloat31(void *base, uint8_t bit, float value) {
- assert(value <= 0.0);
detail::FloatEnc encoded;
encoded.f = value;
encoded.i &= ~0x80000000;
diff --git a/klm/util/mmap.cc b/klm/util/mmap.cc
index 5a810c64..456ce953 100644
--- a/klm/util/mmap.cc
+++ b/klm/util/mmap.cc
@@ -77,6 +77,8 @@ void ReadAll(int fd, void *to_void, std::size_t amount) {
}
}
+} // namespace
+
const int kFileFlags =
#ifdef MAP_FILE
MAP_FILE | MAP_SHARED
@@ -85,8 +87,6 @@ const int kFileFlags =
#endif
;
-} // namespace
-
void MapRead(LoadMethod method, int fd, off_t offset, std::size_t size, scoped_memory &out) {
switch (method) {
case LAZY:
diff --git a/klm/util/mmap.hh b/klm/util/mmap.hh
index 0a504d89..e4439fa4 100644
--- a/klm/util/mmap.hh
+++ b/klm/util/mmap.hh
@@ -91,9 +91,11 @@ typedef enum {
READ
} LoadMethod;
+extern const int kFileFlags;
// Wrapper around mmap to check it worked and hide some platform macros.
void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, off_t offset = 0);
+
void MapRead(LoadMethod method, int fd, off_t offset, std::size_t size, scoped_memory &out);
void *MapAnonymous(std::size_t size);
diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh
index c3529a7e..7b5cdc22 100644
--- a/klm/util/probing_hash_table.hh
+++ b/klm/util/probing_hash_table.hh
@@ -1,6 +1,8 @@
#ifndef UTIL_PROBING_HASH_TABLE__
#define UTIL_PROBING_HASH_TABLE__
+#include "util/exception.hh"
+
#include <algorithm>
#include <cstddef>
#include <functional>
@@ -9,6 +11,13 @@
namespace util {
+/* Thrown when table grows too large */
+class ProbingSizeException : public Exception {
+ public:
+ ProbingSizeException() throw() {}
+ ~ProbingSizeException() throw() {}
+};
+
/* Non-standard hash table
* Buckets must be set at the beginning and must be greater than maximum number
* of elements, else an infinite loop happens.
@@ -33,9 +42,9 @@ template <class PackingT, class HashT, class EqualT = std::equal_to<typename Pac
}
// Must be assigned to later.
- ProbingHashTable()
+ ProbingHashTable() : entries_(0)
#ifdef DEBUG
- : initialized_(false), entries_(0)
+ , initialized_(false)
#endif
{}
@@ -45,17 +54,18 @@ template <class PackingT, class HashT, class EqualT = std::equal_to<typename Pac
end_(begin_ + (allocated / Packing::kBytes)),
invalid_(invalid),
hash_(hash_func),
- equal_(equal_func)
+ equal_(equal_func),
+ entries_(0)
#ifdef DEBUG
, initialized_(true),
- entries_(0)
#endif
{}
template <class T> void Insert(const T &t) {
+ if (++entries_ >= buckets_)
+ UTIL_THROW(ProbingSizeException, "Hash table with " << buckets_ << " buckets is full.");
#ifdef DEBUG
assert(initialized_);
- assert(++entries_ < buckets_);
#endif
for (MutableIterator i(begin_ + (hash_(t.GetKey()) % buckets_));;) {
if (equal_(i->GetKey(), invalid_)) { *i = t; return; }
@@ -86,9 +96,9 @@ template <class PackingT, class HashT, class EqualT = std::equal_to<typename Pac
Key invalid_;
Hash hash_;
Equal equal_;
+ std::size_t entries_;
#ifdef DEBUG
bool initialized_;
- std::size_t entries_;
#endif
};
diff --git a/klm/util/scoped.hh b/klm/util/scoped.hh
index 52864481..d36a7df3 100644
--- a/klm/util/scoped.hh
+++ b/klm/util/scoped.hh
@@ -20,7 +20,9 @@ template <class T, class R, R (*Free)(T*)> class scoped_thing {
}
T &operator*() { return *c_; }
+ const T&operator*() const { return *c_; }
T &operator->() { return *c_; }
+ const T&operator->() const { return *c_; }
T *get() { return c_; }
const T *get() const { return c_; }
@@ -80,6 +82,34 @@ class scoped_FILE {
std::FILE *file_;
};
+// Hat tip to boost.
+template <class T> class scoped_array {
+ public:
+ explicit scoped_array(T *content = NULL) : c_(content) {}
+
+ ~scoped_array() { delete [] c_; }
+
+ T *get() { return c_; }
+ const T* get() const { return c_; }
+
+ T &operator*() { return *c_; }
+ const T&operator*() const { return *c_; }
+
+ T &operator->() { return *c_; }
+ const T&operator->() const { return *c_; }
+
+ T &operator[](std::size_t idx) { return c_[idx]; }
+ const T &operator[](std::size_t idx) const { return c_[idx]; }
+
+ void reset(T *to = NULL) {
+ scoped_array<T> other(c_);
+ c_ = to;
+ }
+
+ private:
+ T *c_;
+};
+
} // namespace util
#endif // UTIL_SCOPED__
diff --git a/klm/util/string_piece.hh b/klm/util/string_piece.hh
index 3ac2f8a7..2583db5e 100644
--- a/klm/util/string_piece.hh
+++ b/klm/util/string_piece.hh
@@ -53,8 +53,13 @@
//Uncomment this line if you want boost hashing for your StringPieces.
//#define HAVE_BOOST
+#ifdef HAVE_BOOST
+#include <boost/functional/hash/hash.hpp>
+#endif // HAVE_BOOST
+
#include <cstring>
#include <iosfwd>
+#include <ostream>
#ifdef HAVE_ICU
#include <unicode/stringpiece.h>
@@ -62,7 +67,6 @@ U_NAMESPACE_BEGIN
#else
#include <algorithm>
-#include <ostream>
#include <string>
#include <string.h>
@@ -228,10 +232,14 @@ inline bool operator>=(const StringPiece& x, const StringPiece& y) {
}
// allow StringPiece to be logged (needed for unit testing).
-extern std::ostream& operator<<(std::ostream& o, const StringPiece& piece);
+inline std::ostream& operator<<(std::ostream& o, const StringPiece& piece) {
+ return o.write(piece.data(), static_cast<std::streamsize>(piece.size()));
+}
#ifdef HAVE_BOOST
-size_t hash_value(const StringPiece &str);
+inline size_t hash_value(const StringPiece &str) {
+ return boost::hash_range(str.data(), str.data() + str.length());
+}
/* Support for lookup of StringPiece in boost::unordered_map<std::string> */
struct StringPieceCompatibleHash : public std::unary_function<const StringPiece &, size_t> {