8 files changed, 139 insertions, 45 deletions
diff --git a/klm/util/double-conversion/utils.h b/klm/util/double-conversion/utils.h
index 767094b8..9ccb3b65 100644
--- a/klm/util/double-conversion/utils.h
+++ b/klm/util/double-conversion/utils.h
@@ -218,7 +218,8 @@ class StringBuilder {
   // 0-characters; use the Finalize() method to terminate the string
   // instead.
   void AddCharacter(char c) {
-    ASSERT(c != '\0');
+    // I just extract raw data not a cstr so null is fine.
+    //ASSERT(c != '\0');
     ASSERT(!is_finalized() && position_ < buffer_.length());
     buffer_[position_++] = c;
   }
@@ -233,7 +234,8 @@ class StringBuilder {
   // builder. The input string must have enough characters.
   void AddSubstring(const char* s, int n) {
     ASSERT(!is_finalized() && position_ + n < buffer_.length());
-    ASSERT(static_cast<size_t>(n) <= strlen(s));
+    // I just extract raw data not a cstr so null is fine.
+    //ASSERT(static_cast<size_t>(n) <= strlen(s));
     memmove(&buffer_[position_], s, n * kCharSize);
     position_ += n;
   }
@@ -253,7 +255,8 @@ class StringBuilder {
     buffer_[position_] = '\0';
     // Make sure nobody managed to add a 0-character to the
     // buffer while building the string.
-    ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_));
+    // I just extract raw data not a cstr so null is fine.
+    //ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_));
     position_ = -1;
     ASSERT(is_finalized());
     return buffer_.start();
@@ -296,7 +299,11 @@ template <class Dest, class Source>
 inline Dest BitCast(const Source& source) {
   // Compile time assertion: sizeof(Dest) == sizeof(Source)
   // A compile error here means your Dest and Source have different sizes.
-  typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1];
+  typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1]
+#if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 8
+      __attribute__((unused))
+#endif
+      ;
 
   Dest dest;
   memmove(&dest, &source, sizeof(dest));
diff --git a/klm/util/file.cc b/klm/util/file.cc
index c7d8e23b..bef04cb1 100644
--- a/klm/util/file.cc
+++ b/klm/util/file.cc
@@ -116,7 +116,7 @@ std::size_t GuardLarge(std::size_t size) {
   // The following operating systems have broken read/write/pread/pwrite that
   // only supports up to 2^31.
 #if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__) || defined(OS_ANDROID)
-  return std::min(static_cast<std::size_t>(INT_MAX), size);
+  return std::min(static_cast<std::size_t>(static_cast<unsigned>(-1)), size);
 #else
   return size;
 #endif
@@ -209,7 +209,7 @@ void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
 #endif
     errno = 0;
     do {
-      ret = 
+      ret =
 #if defined(_WIN32) || defined(_WIN64)
         _write
 #else
@@ -229,7 +229,7 @@ void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
 }
 
 void FSyncOrThrow(int fd) {
-// Apparently windows doesn't have fsync?  
+// Apparently windows doesn't have fsync?
 #if !defined(_WIN32) && !defined(_WIN64)
   UTIL_THROW_IF_ARG(-1 == fsync(fd), FDException, (fd), "while syncing");
 #endif
@@ -248,7 +248,7 @@ template <> struct CheckOffT<8> {
 typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
 #endif
 
-// Can't we all just get along?  
+// Can't we all just get along?
 void InternalSeek(int fd, int64_t off, int whence) {
   if (
 #if defined(_WIN32) || defined(_WIN64)
@@ -457,9 +457,9 @@ bool TryName(int fd, std::string &out) {
   std::ostringstream convert;
   convert << fd;
   name += convert.str();
-  
+
   struct stat sb;
-  if (-1 == lstat(name.c_str(), &sb)) 
+  if (-1 == lstat(name.c_str(), &sb))
     return false;
   out.resize(sb.st_size + 1);
   ssize_t ret = readlink(name.c_str(), &out[0], sb.st_size + 1);
@@ -471,7 +471,7 @@ bool TryName(int fd, std::string &out) {
   }
   out.resize(ret);
   // Don't use the non-file names.
-  if (!out.empty() && out[0] != '/') 
+  if (!out.empty() && out[0] != '/')
     return false;
   return true;
 #endif
diff --git a/klm/util/pool.cc b/klm/util/pool.cc
index 429ba158..db72a8ec 100644
--- a/klm/util/pool.cc
+++ b/klm/util/pool.cc
@@ -25,7 +25,9 @@ void Pool::FreeAll() {
 }
 
 void *Pool::More(std::size_t size) {
-  std::size_t amount = std::max(static_cast<size_t>(32) << free_list_.size(), size);
+  // Double until we hit 2^21 (2 MB).  Then grow in 2 MB blocks. 
+  std::size_t desired_size = static_cast<size_t>(32) << std::min(static_cast<std::size_t>(16), free_list_.size());
+  std::size_t amount = std::max(desired_size, size);
   uint8_t *ret = static_cast<uint8_t*>(MallocOrThrow(amount));
   free_list_.push_back(ret);
   current_ = ret + size;
diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh
index 57866ff9..51a2944d 100644
--- a/klm/util/probing_hash_table.hh
+++ b/klm/util/probing_hash_table.hh
@@ -109,9 +109,20 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
         if (equal_(got, key)) { out = i; return true; }
         if (equal_(got, invalid_)) return false;
         if (++i == end_) i = begin_;
-      }   
+      }
+    }
+
+    // Like UnsafeMutableFind, but the key must be there.
+    template <class Key> MutableIterator UnsafeMutableMustFind(const Key key) {
+       for (MutableIterator i(begin_ + (hash_(key) % buckets_));;) {
+        Key got(i->GetKey());
+        if (equal_(got, key)) { return i; }
+        assert(!equal_(got, invalid_));
+        if (++i == end_) i = begin_;
+      }
     }
 
+
     template <class Key> bool Find(const Key key, ConstIterator &out) const {
 #ifdef DEBUG
       assert(initialized_);
@@ -124,6 +135,16 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
       }    
     }
 
+    // Like Find but we're sure it must be there.
+    template <class Key> ConstIterator MustFind(const Key key) const {
+      for (ConstIterator i(begin_ + (hash_(key) % buckets_));;) {
+        Key got(i->GetKey());
+        if (equal_(got, key)) { return i; }
+        assert(!equal_(got, invalid_));
+        if (++i == end_) i = begin_;
+      }
+    }
+
     void Clear() {
       Entry invalid;
       invalid.SetKey(invalid_);
diff --git a/klm/util/proxy_iterator.hh b/klm/util/proxy_iterator.hh
index 121a45fa..0ee1716f 100644
--- a/klm/util/proxy_iterator.hh
+++ b/klm/util/proxy_iterator.hh
@@ -6,11 +6,11 @@
 
 /* This is a RandomAccessIterator that uses a proxy to access the underlying
  * data.  Useful for packing data at bit offsets but still using STL
- * algorithms.  
+ * algorithms.
  *
  * Normally I would use boost::iterator_facade but some people are too lazy to
  * install boost and still want to use my language model.  It's amazing how
- * many operators an iterator has. 
+ * many operators an iterator has.
  *
  * The Proxy needs to provide:
  *   class InnerIterator;
@@ -22,15 +22,15 @@
  *   operator<(InnerIterator)
  *   operator+=(std::ptrdiff_t)
  *   operator-(InnerIterator)
- * and of course whatever Proxy needs to dereference it.  
+ * and of course whatever Proxy needs to dereference it.
  *
- * It's also a good idea to specialize std::swap for Proxy.  
+ * It's also a good idea to specialize std::swap for Proxy.
  */
 
 namespace util {
 template <class Proxy> class ProxyIterator {
   private:
-    // Self.  
+    // Self.
     typedef ProxyIterator<Proxy> S;
     typedef typename Proxy::InnerIterator InnerIterator;
 
@@ -38,16 +38,21 @@ template <class Proxy> class ProxyIterator {
     typedef std::random_access_iterator_tag iterator_category;
     typedef typename Proxy::value_type value_type;
     typedef std::ptrdiff_t difference_type;
-    typedef Proxy reference;
+    typedef Proxy & reference;
     typedef Proxy * pointer;
 
     ProxyIterator() {}
 
-    // For cast from non const to const.  
+    // For cast from non const to const.
     template <class AlternateProxy> ProxyIterator(const ProxyIterator<AlternateProxy> &in) : p_(*in) {}
     explicit ProxyIterator(const Proxy &p) : p_(p) {}
 
-    // p_'s operator= does value copying, but here we want iterator copying.  
+    // p_'s swap does value swapping, but here we want iterator swapping
+    friend inline void swap(ProxyIterator<Proxy> &first, ProxyIterator<Proxy> &second) {
+      swap(first.I(), second.I());
+    }
+
+    // p_'s operator= does value copying, but here we want iterator copying.
     S &operator=(const S &other) {
       I() = other.I();
       return *this;
@@ -72,8 +77,8 @@ template <class Proxy> class ProxyIterator {
 
     std::ptrdiff_t operator-(const S &other) const { return I() - other.I(); }
 
-    Proxy operator*() { return p_; }
-    const Proxy operator*() const { return p_; }
+    Proxy &operator*() { return p_; }
+    const Proxy &operator*() const { return p_; }
     Proxy *operator->() { return &p_; }
     const Proxy *operator->() const { return &p_; }
     Proxy operator[](std::ptrdiff_t amount) const { return *(*this + amount); }
diff --git a/klm/util/sized_iterator.hh b/klm/util/sized_iterator.hh
index cf998953..dce8f229 100644
--- a/klm/util/sized_iterator.hh
+++ b/klm/util/sized_iterator.hh
@@ -36,6 +36,11 @@ class SizedInnerIterator {
     void *Data() { return ptr_; }
     std::size_t EntrySize() const { return size_; }
 
+    friend inline void swap(SizedInnerIterator &first, SizedInnerIterator &second) {
+      std::swap(first.ptr_, second.ptr_);
+      std::swap(first.size_, second.size_);
+    }
+
   private:
     uint8_t *ptr_;
     std::size_t size_;
@@ -64,9 +69,19 @@ class SizedProxy {
     const void *Data() const { return inner_.Data(); }
     void *Data() { return inner_.Data(); }
 
+  /**
+     // TODO: this (deep) swap was recently added. why? if any std heap sort etc
+     // algs are using swap, that's going to be worse performance than using
+     // =. i'm not sure why we *want* a deep swap. if C++11 compilers are
+     // choosing between move constructor and swap, then we'd better implement a
+     // (deep) move constructor. it may also be that this is moot since i made
+     // ProxyIterator a reference and added a shallow ProxyIterator swap? (I
+     // need Ken or someone competent to judge whether that's correct also. -
+     // let me know at graehl@gmail.com
+  */
     friend void swap(SizedProxy &first, SizedProxy &second) {
       std::swap_ranges(
-          static_cast<char*>(first.inner_.Data()), 
+          static_cast<char*>(first.inner_.Data()),
           static_cast<char*>(first.inner_.Data()) + first.inner_.EntrySize(),
           static_cast<char*>(second.inner_.Data()));
     }
@@ -87,7 +102,7 @@ typedef ProxyIterator<SizedProxy> SizedIterator;
 
 inline SizedIterator SizedIt(void *ptr, std::size_t size) { return SizedIterator(SizedProxy(ptr, size)); }
 
-// Useful wrapper for a comparison function i.e. sort.  
+// Useful wrapper for a comparison function i.e. sort.
 template <class Delegate, class Proxy = SizedProxy> class SizedCompare : public std::binary_function<const Proxy &, const Proxy &, bool> {
   public:
     explicit SizedCompare(const Delegate &delegate = Delegate()) : delegate_(delegate) {}
@@ -106,7 +121,7 @@ template <class Delegate, class Proxy = SizedProxy> class SizedCompare : public
     }
 
     const Delegate &GetDelegate() const { return delegate_; }
-    
+
   private:
     const Delegate delegate_;
 };
diff --git a/klm/util/stream/chain.hh b/klm/util/stream/chain.hh
index 154b9b33..0cc83a85 100644
--- a/klm/util/stream/chain.hh
+++ b/klm/util/stream/chain.hh
@@ -122,7 +122,7 @@ class Chain {
       threads_.push_back(new Thread(Complete(), kRecycle));
     }
 
-    Chain &operator>>(const Recycler &recycle) {
+    Chain &operator>>(const Recycler &) {
       CompleteLoop();
       return *this;
     }
diff --git a/klm/util/usage.cc b/klm/util/usage.cc
index ad4dc7b4..8db375e1 100644
--- a/klm/util/usage.cc
+++ b/klm/util/usage.cc
@@ -5,51 +5,95 @@
 #include <fstream>
 #include <ostream>
 #include <sstream>
+#include <set>
+#include <string>
 
 #include <string.h>
 #include <ctype.h>
 #if !defined(_WIN32) && !defined(_WIN64)
 #include <sys/resource.h>
 #include <sys/time.h>
+#include <time.h>
 #include <unistd.h>
 #endif
 
 namespace util {
 
-namespace {
 #if !defined(_WIN32) && !defined(_WIN64)
+namespace {
+
+// On Mac OS X, clock_gettime is not implemented.
+// CLOCK_MONOTONIC is not defined either.
+#ifdef __MACH__
+#define CLOCK_MONOTONIC 0
+
+int clock_gettime(int clk_id, struct timespec *tp) {
+  struct timeval tv;
+  gettimeofday(&tv, NULL);
+  tp->tv_sec = tv.tv_sec;
+  tp->tv_nsec = tv.tv_usec * 1000;
+  return 0;
+}
+#endif // __MACH__
+
 float FloatSec(const struct timeval &tv) {
   return static_cast<float>(tv.tv_sec) + (static_cast<float>(tv.tv_usec) / 1000000.0);
 }
-#endif
+float FloatSec(const struct timespec &tv) {
+  return static_cast<float>(tv.tv_sec) + (static_cast<float>(tv.tv_nsec) / 1000000000.0);
+}
 
 const char *SkipSpaces(const char *at) {
-  for (; *at == ' '; ++at) {}
+  for (; *at == ' ' || *at == '\t'; ++at) {}
   return at;
 }
+
+class RecordStart {
+  public:
+    RecordStart() {
+      clock_gettime(CLOCK_MONOTONIC, &started_);
+    }
+
+    const struct timespec &Started() const {
+      return started_;
+    }
+
+  private:
+    struct timespec started_;
+};
+
+const RecordStart kRecordStart;
 } // namespace
+#endif
 
 void PrintUsage(std::ostream &out) {
 #if !defined(_WIN32) && !defined(_WIN64)
+  // Linux doesn't set memory usage in getrusage :-(
+  std::set<std::string> headers;
+  headers.insert("VmPeak:");
+  headers.insert("VmRSS:");
+  headers.insert("Name:");
+
+  std::ifstream status("/proc/self/status", std::ios::in);
+  std::string header, value;
+  while ((status >> header) && getline(status, value)) {
+    if (headers.find(header) != headers.end()) {
+      out << header << SkipSpaces(value.c_str()) << '\t';
+    }
+  }
+
   struct rusage usage;
-  if (getrusage(RUSAGE_SELF, &usage)) {
+  if (getrusage(RUSAGE_CHILDREN, &usage)) {
     perror("getrusage");
     return;
   }
-  out << "user\t" << FloatSec(usage.ru_utime) << "\nsys\t" << FloatSec(usage.ru_stime) << '\n';
-  out << "CPU\t" << (FloatSec(usage.ru_utime) + FloatSec(usage.ru_stime)) << '\n';
-  // Linux doesn't set memory usage :-(.  
-  std::ifstream status("/proc/self/status", std::ios::in);
-  std::string line;
-  while (getline(status, line)) {
-    if (!strncmp(line.c_str(), "VmRSS:\t", 7)) {
-      out << "RSSCur\t" << SkipSpaces(line.c_str() + 7) << '\n';
-      break;
-    } else if (!strncmp(line.c_str(), "VmPeak:\t", 8)) {
-      out << "VmPeak\t" << SkipSpaces(line.c_str() + 8) << '\n';
-    }
-  }
-  out << "RSSMax\t" << usage.ru_maxrss << " kB" << '\n';
+  out << "RSSMax:" << usage.ru_maxrss << " kB" << '\t';
+  out << "user:" << FloatSec(usage.ru_utime) << "\tsys:" << FloatSec(usage.ru_stime) << '\t';
+  out << "CPU:" << (FloatSec(usage.ru_utime) + FloatSec(usage.ru_stime));
+
+  struct timespec current;
+  clock_gettime(CLOCK_MONOTONIC, &current);
+  out << "\treal:" << (FloatSec(current) - FloatSec(kRecordStart.Started())) << '\n';
 #endif
 }