From 535d4016ec5179cb673b697c2e81500a2097924c Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Tue, 18 Jun 2013 11:34:20 -0700 Subject: lazy dd880b4 including kenlm 6eef0f1 --- klm/util/double-conversion/utils.h | 6 +++++- klm/util/file.cc | 14 +++++++------- klm/util/pool.cc | 4 +++- klm/util/probing_hash_table.hh | 23 ++++++++++++++++++++++- klm/util/proxy_iterator.hh | 25 +++++++++++++++---------- klm/util/sized_iterator.hh | 21 ++++++++++++++++++--- klm/util/stream/chain.hh | 2 +- klm/util/usage.cc | 15 +++++++++++++++ 8 files changed, 86 insertions(+), 24 deletions(-) (limited to 'klm/util') diff --git a/klm/util/double-conversion/utils.h b/klm/util/double-conversion/utils.h index 2bd71605..9ccb3b65 100644 --- a/klm/util/double-conversion/utils.h +++ b/klm/util/double-conversion/utils.h @@ -299,7 +299,11 @@ template inline Dest BitCast(const Source& source) { // Compile time assertion: sizeof(Dest) == sizeof(Source) // A compile error here means your Dest and Source have different sizes. - typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1]; + typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1] +#if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 8 + __attribute__((unused)) +#endif + ; Dest dest; memmove(&dest, &source, sizeof(dest)); diff --git a/klm/util/file.cc b/klm/util/file.cc index c7d8e23b..bef04cb1 100644 --- a/klm/util/file.cc +++ b/klm/util/file.cc @@ -116,7 +116,7 @@ std::size_t GuardLarge(std::size_t size) { // The following operating systems have broken read/write/pread/pwrite that // only supports up to 2^31. #if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__) || defined(OS_ANDROID) - return std::min(static_cast(INT_MAX), size); + return std::min(static_cast(static_cast(-1)), size); #else return size; #endif @@ -209,7 +209,7 @@ void WriteOrThrow(int fd, const void *data_void, std::size_t size) { #endif errno = 0; do { - ret = + ret = #if defined(_WIN32) || defined(_WIN64) _write #else @@ -229,7 +229,7 @@ void WriteOrThrow(FILE *to, const void *data, std::size_t size) { } void FSyncOrThrow(int fd) { -// Apparently windows doesn't have fsync? +// Apparently windows doesn't have fsync? #if !defined(_WIN32) && !defined(_WIN64) UTIL_THROW_IF_ARG(-1 == fsync(fd), FDException, (fd), "while syncing"); #endif @@ -248,7 +248,7 @@ template <> struct CheckOffT<8> { typedef CheckOffT::True IgnoredType; #endif -// Can't we all just get along? +// Can't we all just get along? void InternalSeek(int fd, int64_t off, int whence) { if ( #if defined(_WIN32) || defined(_WIN64) @@ -457,9 +457,9 @@ bool TryName(int fd, std::string &out) { std::ostringstream convert; convert << fd; name += convert.str(); - + struct stat sb; - if (-1 == lstat(name.c_str(), &sb)) + if (-1 == lstat(name.c_str(), &sb)) return false; out.resize(sb.st_size + 1); ssize_t ret = readlink(name.c_str(), &out[0], sb.st_size + 1); @@ -471,7 +471,7 @@ bool TryName(int fd, std::string &out) { } out.resize(ret); // Don't use the non-file names. - if (!out.empty() && out[0] != '/') + if (!out.empty() && out[0] != '/') return false; return true; #endif diff --git a/klm/util/pool.cc b/klm/util/pool.cc index 429ba158..db72a8ec 100644 --- a/klm/util/pool.cc +++ b/klm/util/pool.cc @@ -25,7 +25,9 @@ void Pool::FreeAll() { } void *Pool::More(std::size_t size) { - std::size_t amount = std::max(static_cast(32) << free_list_.size(), size); + // Double until we hit 2^21 (2 MB). Then grow in 2 MB blocks. + std::size_t desired_size = static_cast(32) << std::min(static_cast(16), free_list_.size()); + std::size_t amount = std::max(desired_size, size); uint8_t *ret = static_cast(MallocOrThrow(amount)); free_list_.push_back(ret); current_ = ret + size; diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh index 57866ff9..51a2944d 100644 --- a/klm/util/probing_hash_table.hh +++ b/klm/util/probing_hash_table.hh @@ -109,9 +109,20 @@ template MutableIterator UnsafeMutableMustFind(const Key key) { + for (MutableIterator i(begin_ + (hash_(key) % buckets_));;) { + Key got(i->GetKey()); + if (equal_(got, key)) { return i; } + assert(!equal_(got, invalid_)); + if (++i == end_) i = begin_; + } } + template bool Find(const Key key, ConstIterator &out) const { #ifdef DEBUG assert(initialized_); @@ -124,6 +135,16 @@ template ConstIterator MustFind(const Key key) const { + for (ConstIterator i(begin_ + (hash_(key) % buckets_));;) { + Key got(i->GetKey()); + if (equal_(got, key)) { return i; } + assert(!equal_(got, invalid_)); + if (++i == end_) i = begin_; + } + } + void Clear() { Entry invalid; invalid.SetKey(invalid_); diff --git a/klm/util/proxy_iterator.hh b/klm/util/proxy_iterator.hh index 121a45fa..0ee1716f 100644 --- a/klm/util/proxy_iterator.hh +++ b/klm/util/proxy_iterator.hh @@ -6,11 +6,11 @@ /* This is a RandomAccessIterator that uses a proxy to access the underlying * data. Useful for packing data at bit offsets but still using STL - * algorithms. + * algorithms. * * Normally I would use boost::iterator_facade but some people are too lazy to * install boost and still want to use my language model. It's amazing how - * many operators an iterator has. + * many operators an iterator has. * * The Proxy needs to provide: * class InnerIterator; @@ -22,15 +22,15 @@ * operator<(InnerIterator) * operator+=(std::ptrdiff_t) * operator-(InnerIterator) - * and of course whatever Proxy needs to dereference it. + * and of course whatever Proxy needs to dereference it. * - * It's also a good idea to specialize std::swap for Proxy. + * It's also a good idea to specialize std::swap for Proxy. */ namespace util { template class ProxyIterator { private: - // Self. + // Self. typedef ProxyIterator S; typedef typename Proxy::InnerIterator InnerIterator; @@ -38,16 +38,21 @@ template class ProxyIterator { typedef std::random_access_iterator_tag iterator_category; typedef typename Proxy::value_type value_type; typedef std::ptrdiff_t difference_type; - typedef Proxy reference; + typedef Proxy & reference; typedef Proxy * pointer; ProxyIterator() {} - // For cast from non const to const. + // For cast from non const to const. template ProxyIterator(const ProxyIterator &in) : p_(*in) {} explicit ProxyIterator(const Proxy &p) : p_(p) {} - // p_'s operator= does value copying, but here we want iterator copying. + // p_'s swap does value swapping, but here we want iterator swapping + friend inline void swap(ProxyIterator &first, ProxyIterator &second) { + swap(first.I(), second.I()); + } + + // p_'s operator= does value copying, but here we want iterator copying. S &operator=(const S &other) { I() = other.I(); return *this; @@ -72,8 +77,8 @@ template class ProxyIterator { std::ptrdiff_t operator-(const S &other) const { return I() - other.I(); } - Proxy operator*() { return p_; } - const Proxy operator*() const { return p_; } + Proxy &operator*() { return p_; } + const Proxy &operator*() const { return p_; } Proxy *operator->() { return &p_; } const Proxy *operator->() const { return &p_; } Proxy operator[](std::ptrdiff_t amount) const { return *(*this + amount); } diff --git a/klm/util/sized_iterator.hh b/klm/util/sized_iterator.hh index cf998953..dce8f229 100644 --- a/klm/util/sized_iterator.hh +++ b/klm/util/sized_iterator.hh @@ -36,6 +36,11 @@ class SizedInnerIterator { void *Data() { return ptr_; } std::size_t EntrySize() const { return size_; } + friend inline void swap(SizedInnerIterator &first, SizedInnerIterator &second) { + std::swap(first.ptr_, second.ptr_); + std::swap(first.size_, second.size_); + } + private: uint8_t *ptr_; std::size_t size_; @@ -64,9 +69,19 @@ class SizedProxy { const void *Data() const { return inner_.Data(); } void *Data() { return inner_.Data(); } + /** + // TODO: this (deep) swap was recently added. why? if any std heap sort etc + // algs are using swap, that's going to be worse performance than using + // =. i'm not sure why we *want* a deep swap. if C++11 compilers are + // choosing between move constructor and swap, then we'd better implement a + // (deep) move constructor. it may also be that this is moot since i made + // ProxyIterator a reference and added a shallow ProxyIterator swap? (I + // need Ken or someone competent to judge whether that's correct also. - + // let me know at graehl@gmail.com + */ friend void swap(SizedProxy &first, SizedProxy &second) { std::swap_ranges( - static_cast(first.inner_.Data()), + static_cast(first.inner_.Data()), static_cast(first.inner_.Data()) + first.inner_.EntrySize(), static_cast(second.inner_.Data())); } @@ -87,7 +102,7 @@ typedef ProxyIterator SizedIterator; inline SizedIterator SizedIt(void *ptr, std::size_t size) { return SizedIterator(SizedProxy(ptr, size)); } -// Useful wrapper for a comparison function i.e. sort. +// Useful wrapper for a comparison function i.e. sort. template class SizedCompare : public std::binary_function { public: explicit SizedCompare(const Delegate &delegate = Delegate()) : delegate_(delegate) {} @@ -106,7 +121,7 @@ template class SizedCompare : public } const Delegate &GetDelegate() const { return delegate_; } - + private: const Delegate delegate_; }; diff --git a/klm/util/stream/chain.hh b/klm/util/stream/chain.hh index 154b9b33..0cc83a85 100644 --- a/klm/util/stream/chain.hh +++ b/klm/util/stream/chain.hh @@ -122,7 +122,7 @@ class Chain { threads_.push_back(new Thread(Complete(), kRecycle)); } - Chain &operator>>(const Recycler &recycle) { + Chain &operator>>(const Recycler &) { CompleteLoop(); return *this; } diff --git a/klm/util/usage.cc b/klm/util/usage.cc index 5fa3cc9a..8db375e1 100644 --- a/klm/util/usage.cc +++ b/klm/util/usage.cc @@ -21,6 +21,21 @@ namespace util { #if !defined(_WIN32) && !defined(_WIN64) namespace { + +// On Mac OS X, clock_gettime is not implemented. +// CLOCK_MONOTONIC is not defined either. +#ifdef __MACH__ +#define CLOCK_MONOTONIC 0 + +int clock_gettime(int clk_id, struct timespec *tp) { + struct timeval tv; + gettimeofday(&tv, NULL); + tp->tv_sec = tv.tv_sec; + tp->tv_nsec = tv.tv_usec * 1000; + return 0; +} +#endif // __MACH__ + float FloatSec(const struct timeval &tv) { return static_cast(tv.tv_sec) + (static_cast(tv.tv_usec) / 1000000.0); } -- cgit v1.2.3