summaryrefslogtreecommitdiff
path: root/klm/util
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2013-06-18 11:34:20 -0700
committerKenneth Heafield <github@kheafield.com>2013-06-18 11:34:20 -0700
commit535d4016ec5179cb673b697c2e81500a2097924c (patch)
tree4ae43b02d23317f37017a93fd12552b55c8d2a06 /klm/util
parent5dc790adc222db09c25b8be1b7a443a142f70180 (diff)
lazy dd880b4 including kenlm 6eef0f1
Diffstat (limited to 'klm/util')
-rw-r--r--klm/util/double-conversion/utils.h6
-rw-r--r--klm/util/file.cc14
-rw-r--r--klm/util/pool.cc4
-rw-r--r--klm/util/probing_hash_table.hh23
-rw-r--r--klm/util/proxy_iterator.hh25
-rw-r--r--klm/util/sized_iterator.hh21
-rw-r--r--klm/util/stream/chain.hh2
-rw-r--r--klm/util/usage.cc15
8 files changed, 86 insertions, 24 deletions
diff --git a/klm/util/double-conversion/utils.h b/klm/util/double-conversion/utils.h
index 2bd71605..9ccb3b65 100644
--- a/klm/util/double-conversion/utils.h
+++ b/klm/util/double-conversion/utils.h
@@ -299,7 +299,11 @@ template <class Dest, class Source>
inline Dest BitCast(const Source& source) {
// Compile time assertion: sizeof(Dest) == sizeof(Source)
// A compile error here means your Dest and Source have different sizes.
- typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1];
+ typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1]
+#if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 8
+ __attribute__((unused))
+#endif
+ ;
Dest dest;
memmove(&dest, &source, sizeof(dest));
diff --git a/klm/util/file.cc b/klm/util/file.cc
index c7d8e23b..bef04cb1 100644
--- a/klm/util/file.cc
+++ b/klm/util/file.cc
@@ -116,7 +116,7 @@ std::size_t GuardLarge(std::size_t size) {
// The following operating systems have broken read/write/pread/pwrite that
// only supports up to 2^31.
#if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__) || defined(OS_ANDROID)
- return std::min(static_cast<std::size_t>(INT_MAX), size);
+ return std::min(static_cast<std::size_t>(static_cast<unsigned>(-1)), size);
#else
return size;
#endif
@@ -209,7 +209,7 @@ void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
#endif
errno = 0;
do {
- ret =
+ ret =
#if defined(_WIN32) || defined(_WIN64)
_write
#else
@@ -229,7 +229,7 @@ void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
}
void FSyncOrThrow(int fd) {
-// Apparently windows doesn't have fsync?
+// Apparently windows doesn't have fsync?
#if !defined(_WIN32) && !defined(_WIN64)
UTIL_THROW_IF_ARG(-1 == fsync(fd), FDException, (fd), "while syncing");
#endif
@@ -248,7 +248,7 @@ template <> struct CheckOffT<8> {
typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
#endif
-// Can't we all just get along?
+// Can't we all just get along?
void InternalSeek(int fd, int64_t off, int whence) {
if (
#if defined(_WIN32) || defined(_WIN64)
@@ -457,9 +457,9 @@ bool TryName(int fd, std::string &out) {
std::ostringstream convert;
convert << fd;
name += convert.str();
-
+
struct stat sb;
- if (-1 == lstat(name.c_str(), &sb))
+ if (-1 == lstat(name.c_str(), &sb))
return false;
out.resize(sb.st_size + 1);
ssize_t ret = readlink(name.c_str(), &out[0], sb.st_size + 1);
@@ -471,7 +471,7 @@ bool TryName(int fd, std::string &out) {
}
out.resize(ret);
// Don't use the non-file names.
- if (!out.empty() && out[0] != '/')
+ if (!out.empty() && out[0] != '/')
return false;
return true;
#endif
diff --git a/klm/util/pool.cc b/klm/util/pool.cc
index 429ba158..db72a8ec 100644
--- a/klm/util/pool.cc
+++ b/klm/util/pool.cc
@@ -25,7 +25,9 @@ void Pool::FreeAll() {
}
void *Pool::More(std::size_t size) {
- std::size_t amount = std::max(static_cast<size_t>(32) << free_list_.size(), size);
+ // Double until we hit 2^21 (2 MB). Then grow in 2 MB blocks.
+ std::size_t desired_size = static_cast<size_t>(32) << std::min(static_cast<std::size_t>(16), free_list_.size());
+ std::size_t amount = std::max(desired_size, size);
uint8_t *ret = static_cast<uint8_t*>(MallocOrThrow(amount));
free_list_.push_back(ret);
current_ = ret + size;
diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh
index 57866ff9..51a2944d 100644
--- a/klm/util/probing_hash_table.hh
+++ b/klm/util/probing_hash_table.hh
@@ -109,9 +109,20 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
if (equal_(got, key)) { out = i; return true; }
if (equal_(got, invalid_)) return false;
if (++i == end_) i = begin_;
- }
+ }
+ }
+
+ // Like UnsafeMutableFind, but the key must be there.
+ template <class Key> MutableIterator UnsafeMutableMustFind(const Key key) {
+ for (MutableIterator i(begin_ + (hash_(key) % buckets_));;) {
+ Key got(i->GetKey());
+ if (equal_(got, key)) { return i; }
+ assert(!equal_(got, invalid_));
+ if (++i == end_) i = begin_;
+ }
}
+
template <class Key> bool Find(const Key key, ConstIterator &out) const {
#ifdef DEBUG
assert(initialized_);
@@ -124,6 +135,16 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
}
}
+ // Like Find but we're sure it must be there.
+ template <class Key> ConstIterator MustFind(const Key key) const {
+ for (ConstIterator i(begin_ + (hash_(key) % buckets_));;) {
+ Key got(i->GetKey());
+ if (equal_(got, key)) { return i; }
+ assert(!equal_(got, invalid_));
+ if (++i == end_) i = begin_;
+ }
+ }
+
void Clear() {
Entry invalid;
invalid.SetKey(invalid_);
diff --git a/klm/util/proxy_iterator.hh b/klm/util/proxy_iterator.hh
index 121a45fa..0ee1716f 100644
--- a/klm/util/proxy_iterator.hh
+++ b/klm/util/proxy_iterator.hh
@@ -6,11 +6,11 @@
/* This is a RandomAccessIterator that uses a proxy to access the underlying
* data. Useful for packing data at bit offsets but still using STL
- * algorithms.
+ * algorithms.
*
* Normally I would use boost::iterator_facade but some people are too lazy to
* install boost and still want to use my language model. It's amazing how
- * many operators an iterator has.
+ * many operators an iterator has.
*
* The Proxy needs to provide:
* class InnerIterator;
@@ -22,15 +22,15 @@
* operator<(InnerIterator)
* operator+=(std::ptrdiff_t)
* operator-(InnerIterator)
- * and of course whatever Proxy needs to dereference it.
+ * and of course whatever Proxy needs to dereference it.
*
- * It's also a good idea to specialize std::swap for Proxy.
+ * It's also a good idea to specialize std::swap for Proxy.
*/
namespace util {
template <class Proxy> class ProxyIterator {
private:
- // Self.
+ // Self.
typedef ProxyIterator<Proxy> S;
typedef typename Proxy::InnerIterator InnerIterator;
@@ -38,16 +38,21 @@ template <class Proxy> class ProxyIterator {
typedef std::random_access_iterator_tag iterator_category;
typedef typename Proxy::value_type value_type;
typedef std::ptrdiff_t difference_type;
- typedef Proxy reference;
+ typedef Proxy & reference;
typedef Proxy * pointer;
ProxyIterator() {}
- // For cast from non const to const.
+ // For cast from non const to const.
template <class AlternateProxy> ProxyIterator(const ProxyIterator<AlternateProxy> &in) : p_(*in) {}
explicit ProxyIterator(const Proxy &p) : p_(p) {}
- // p_'s operator= does value copying, but here we want iterator copying.
+ // p_'s swap does value swapping, but here we want iterator swapping
+ friend inline void swap(ProxyIterator<Proxy> &first, ProxyIterator<Proxy> &second) {
+ swap(first.I(), second.I());
+ }
+
+ // p_'s operator= does value copying, but here we want iterator copying.
S &operator=(const S &other) {
I() = other.I();
return *this;
@@ -72,8 +77,8 @@ template <class Proxy> class ProxyIterator {
std::ptrdiff_t operator-(const S &other) const { return I() - other.I(); }
- Proxy operator*() { return p_; }
- const Proxy operator*() const { return p_; }
+ Proxy &operator*() { return p_; }
+ const Proxy &operator*() const { return p_; }
Proxy *operator->() { return &p_; }
const Proxy *operator->() const { return &p_; }
Proxy operator[](std::ptrdiff_t amount) const { return *(*this + amount); }
diff --git a/klm/util/sized_iterator.hh b/klm/util/sized_iterator.hh
index cf998953..dce8f229 100644
--- a/klm/util/sized_iterator.hh
+++ b/klm/util/sized_iterator.hh
@@ -36,6 +36,11 @@ class SizedInnerIterator {
void *Data() { return ptr_; }
std::size_t EntrySize() const { return size_; }
+ friend inline void swap(SizedInnerIterator &first, SizedInnerIterator &second) {
+ std::swap(first.ptr_, second.ptr_);
+ std::swap(first.size_, second.size_);
+ }
+
private:
uint8_t *ptr_;
std::size_t size_;
@@ -64,9 +69,19 @@ class SizedProxy {
const void *Data() const { return inner_.Data(); }
void *Data() { return inner_.Data(); }
+ /**
+ // TODO: this (deep) swap was recently added. why? if any std heap sort etc
+ // algs are using swap, that's going to be worse performance than using
+ // =. i'm not sure why we *want* a deep swap. if C++11 compilers are
+ // choosing between move constructor and swap, then we'd better implement a
+ // (deep) move constructor. it may also be that this is moot since i made
+ // ProxyIterator a reference and added a shallow ProxyIterator swap? (I
+ // need Ken or someone competent to judge whether that's correct also. -
+ // let me know at graehl@gmail.com
+ */
friend void swap(SizedProxy &first, SizedProxy &second) {
std::swap_ranges(
- static_cast<char*>(first.inner_.Data()),
+ static_cast<char*>(first.inner_.Data()),
static_cast<char*>(first.inner_.Data()) + first.inner_.EntrySize(),
static_cast<char*>(second.inner_.Data()));
}
@@ -87,7 +102,7 @@ typedef ProxyIterator<SizedProxy> SizedIterator;
inline SizedIterator SizedIt(void *ptr, std::size_t size) { return SizedIterator(SizedProxy(ptr, size)); }
-// Useful wrapper for a comparison function i.e. sort.
+// Useful wrapper for a comparison function i.e. sort.
template <class Delegate, class Proxy = SizedProxy> class SizedCompare : public std::binary_function<const Proxy &, const Proxy &, bool> {
public:
explicit SizedCompare(const Delegate &delegate = Delegate()) : delegate_(delegate) {}
@@ -106,7 +121,7 @@ template <class Delegate, class Proxy = SizedProxy> class SizedCompare : public
}
const Delegate &GetDelegate() const { return delegate_; }
-
+
private:
const Delegate delegate_;
};
diff --git a/klm/util/stream/chain.hh b/klm/util/stream/chain.hh
index 154b9b33..0cc83a85 100644
--- a/klm/util/stream/chain.hh
+++ b/klm/util/stream/chain.hh
@@ -122,7 +122,7 @@ class Chain {
threads_.push_back(new Thread(Complete(), kRecycle));
}
- Chain &operator>>(const Recycler &recycle) {
+ Chain &operator>>(const Recycler &) {
CompleteLoop();
return *this;
}
diff --git a/klm/util/usage.cc b/klm/util/usage.cc
index 5fa3cc9a..8db375e1 100644
--- a/klm/util/usage.cc
+++ b/klm/util/usage.cc
@@ -21,6 +21,21 @@ namespace util {
#if !defined(_WIN32) && !defined(_WIN64)
namespace {
+
+// On Mac OS X, clock_gettime is not implemented.
+// CLOCK_MONOTONIC is not defined either.
+#ifdef __MACH__
+#define CLOCK_MONOTONIC 0
+
+int clock_gettime(int clk_id, struct timespec *tp) {
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ tp->tv_sec = tv.tv_sec;
+ tp->tv_nsec = tv.tv_usec * 1000;
+ return 0;
+}
+#endif // __MACH__
+
float FloatSec(const struct timeval &tv) {
return static_cast<float>(tv.tv_sec) + (static_cast<float>(tv.tv_usec) / 1000000.0);
}