68 files changed, 9626 insertions, 292 deletions
diff --git a/klm/util/Jamfile b/klm/util/Jamfile
deleted file mode 100644
index 3ee2c2c2..00000000
--- a/klm/util/Jamfile
+++ /dev/null
@@ -1,10 +0,0 @@
-lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc usage.cc ../..//z : <include>.. : : <include>.. ;
-
-import testing ;
-
-unit-test bit_packing_test : bit_packing_test.cc kenutil ../..///boost_unit_test_framework ;
-run file_piece_test.cc kenutil ../..///boost_unit_test_framework : : file_piece.cc ;
-unit-test joint_sort_test : joint_sort_test.cc kenutil ../..///boost_unit_test_framework ;
-unit-test probing_hash_table_test : probing_hash_table_test.cc kenutil ../..///boost_unit_test_framework ;
-unit-test sorted_uniform_test : sorted_uniform_test.cc kenutil ../..///boost_unit_test_framework ;
-unit-test tokenize_piece_test : tokenize_piece_test.cc kenutil ../..///boost_unit_test_framework ;
diff --git a/klm/util/Makefile.am b/klm/util/Makefile.am
index 5ceccf2c..7f873e96 100644
--- a/klm/util/Makefile.am
+++ b/klm/util/Makefile.am
@@ -19,6 +19,29 @@
 noinst_LIBRARIES = libklm_util.a
 
 libklm_util_a_SOURCES = \
+  bit_packing.hh \
+  ersatz_progress.hh \
+  exception.hh \
+  file.hh \
+  file_piece.hh \
+  getopt.hh \
+  have.hh \
+  joint_sort.hh \
+  mmap.hh \
+  murmur_hash.hh \
+  pcqueue.hh \
+  pool.hh \
+  probing_hash_table.hh \
+  proxy_iterator.hh \
+  read_compressed.hh \
+  scoped.hh \
+  sized_iterator.hh \
+  sorted_uniform.hh \
+  string_piece.hh \
+  string_piece_hash.hh \
+  thread_pool.hh \
+  tokenize_piece.hh \
+  usage.hh \
   ersatz_progress.cc \
   bit_packing.cc \
   exception.cc \
@@ -26,6 +49,10 @@ libklm_util_a_SOURCES = \
   file_piece.cc \
   mmap.cc \
   murmur_hash.cc \
+  pool.cc \
+	read_compressed.cc \
+  scoped.cc \
+  string_piece.cc \
 	usage.cc
 
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I..
+AM_CPPFLAGS = -W -Wall -I$(top_srcdir)/klm -I$(top_srcdir)/klm/util/double-conversion
diff --git a/klm/util/double-conversion/LICENSE b/klm/util/double-conversion/LICENSE
new file mode 100644
index 00000000..933718a9
--- /dev/null
+++ b/klm/util/double-conversion/LICENSE
@@ -0,0 +1,26 @@
+Copyright 2006-2011, the V8 project authors. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+    * Neither the name of Google Inc. nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/klm/util/double-conversion/Makefile.am b/klm/util/double-conversion/Makefile.am
new file mode 100644
index 00000000..dfcfb009
--- /dev/null
+++ b/klm/util/double-conversion/Makefile.am
@@ -0,0 +1,23 @@
+noinst_LIBRARIES = libklm_util_double.a
+
+libklm_util_double_a_SOURCES = \
+  bignum-dtoa.h \
+  bignum.h \
+  cached-powers.h \
+  diy-fp.h \
+  double-conversion.h \
+  fast-dtoa.h \
+  fixed-dtoa.h \
+  ieee.h \
+  strtod.h \
+  utils.h \
+  bignum.cc \
+  bignum-dtoa.cc \
+  cached-powers.cc \
+  diy-fp.cc \
+  double-conversion.cc \
+  fast-dtoa.cc \
+  fixed-dtoa.cc \
+  strtod.cc
+
+AM_CPPFLAGS = -W -Wall -I$(top_srcdir)/klm -I$(top_srcdir)/klm/util/double-conversion
diff --git a/klm/util/double-conversion/bignum-dtoa.cc b/klm/util/double-conversion/bignum-dtoa.cc
new file mode 100644
index 00000000..b6c2e85d
--- /dev/null
+++ b/klm/util/double-conversion/bignum-dtoa.cc
@@ -0,0 +1,640 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <math.h>
+
+#include "bignum-dtoa.h"
+
+#include "bignum.h"
+#include "ieee.h"
+
+namespace double_conversion {
+
+static int NormalizedExponent(uint64_t significand, int exponent) {
+  ASSERT(significand != 0);
+  while ((significand & Double::kHiddenBit) == 0) {
+    significand = significand << 1;
+    exponent = exponent - 1;
+  }
+  return exponent;
+}
+
+
+// Forward declarations:
+// Returns an estimation of k such that 10^(k-1) <= v < 10^k.
+static int EstimatePower(int exponent);
+// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator
+// and denominator.
+static void InitialScaledStartValues(uint64_t significand,
+                                     int exponent,
+                                     bool lower_boundary_is_closer,
+                                     int estimated_power,
+                                     bool need_boundary_deltas,
+                                     Bignum* numerator,
+                                     Bignum* denominator,
+                                     Bignum* delta_minus,
+                                     Bignum* delta_plus);
+// Multiplies numerator/denominator so that its values lies in the range 1-10.
+// Returns decimal_point s.t.
+//  v = numerator'/denominator' * 10^(decimal_point-1)
+//     where numerator' and denominator' are the values of numerator and
+//     denominator after the call to this function.
+static void FixupMultiply10(int estimated_power, bool is_even,
+                            int* decimal_point,
+                            Bignum* numerator, Bignum* denominator,
+                            Bignum* delta_minus, Bignum* delta_plus);
+// Generates digits from the left to the right and stops when the generated
+// digits yield the shortest decimal representation of v.
+static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator,
+                                   Bignum* delta_minus, Bignum* delta_plus,
+                                   bool is_even,
+                                   Vector<char> buffer, int* length);
+// Generates 'requested_digits' after the decimal point.
+static void BignumToFixed(int requested_digits, int* decimal_point,
+                          Bignum* numerator, Bignum* denominator,
+                          Vector<char>(buffer), int* length);
+// Generates 'count' digits of numerator/denominator.
+// Once 'count' digits have been produced rounds the result depending on the
+// remainder (remainders of exactly .5 round upwards). Might update the
+// decimal_point when rounding up (for example for 0.9999).
+static void GenerateCountedDigits(int count, int* decimal_point,
+                                  Bignum* numerator, Bignum* denominator,
+                                  Vector<char>(buffer), int* length);
+
+
+void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits,
+                Vector<char> buffer, int* length, int* decimal_point) {
+  ASSERT(v > 0);
+  ASSERT(!Double(v).IsSpecial());
+  uint64_t significand;
+  int exponent;
+  bool lower_boundary_is_closer;
+  if (mode == BIGNUM_DTOA_SHORTEST_SINGLE) {
+    float f = static_cast<float>(v);
+    ASSERT(f == v);
+    significand = Single(f).Significand();
+    exponent = Single(f).Exponent();
+    lower_boundary_is_closer = Single(f).LowerBoundaryIsCloser();
+  } else {
+    significand = Double(v).Significand();
+    exponent = Double(v).Exponent();
+    lower_boundary_is_closer = Double(v).LowerBoundaryIsCloser();
+  }
+  bool need_boundary_deltas =
+      (mode == BIGNUM_DTOA_SHORTEST || mode == BIGNUM_DTOA_SHORTEST_SINGLE);
+
+  bool is_even = (significand & 1) == 0;
+  int normalized_exponent = NormalizedExponent(significand, exponent);
+  // estimated_power might be too low by 1.
+  int estimated_power = EstimatePower(normalized_exponent);
+
+  // Shortcut for Fixed.
+  // The requested digits correspond to the digits after the point. If the
+  // number is much too small, then there is no need in trying to get any
+  // digits.
+  if (mode == BIGNUM_DTOA_FIXED && -estimated_power - 1 > requested_digits) {
+    buffer[0] = '\0';
+    *length = 0;
+    // Set decimal-point to -requested_digits. This is what Gay does.
+    // Note that it should not have any effect anyways since the string is
+    // empty.
+    *decimal_point = -requested_digits;
+    return;
+  }
+
+  Bignum numerator;
+  Bignum denominator;
+  Bignum delta_minus;
+  Bignum delta_plus;
+  // Make sure the bignum can grow large enough. The smallest double equals
+  // 4e-324. In this case the denominator needs fewer than 324*4 binary digits.
+  // The maximum double is 1.7976931348623157e308 which needs fewer than
+  // 308*4 binary digits.
+  ASSERT(Bignum::kMaxSignificantBits >= 324*4);
+  InitialScaledStartValues(significand, exponent, lower_boundary_is_closer,
+                           estimated_power, need_boundary_deltas,
+                           &numerator, &denominator,
+                           &delta_minus, &delta_plus);
+  // We now have v = (numerator / denominator) * 10^estimated_power.
+  FixupMultiply10(estimated_power, is_even, decimal_point,
+                  &numerator, &denominator,
+                  &delta_minus, &delta_plus);
+  // We now have v = (numerator / denominator) * 10^(decimal_point-1), and
+  //  1 <= (numerator + delta_plus) / denominator < 10
+  switch (mode) {
+    case BIGNUM_DTOA_SHORTEST:
+    case BIGNUM_DTOA_SHORTEST_SINGLE:
+      GenerateShortestDigits(&numerator, &denominator,
+                             &delta_minus, &delta_plus,
+                             is_even, buffer, length);
+      break;
+    case BIGNUM_DTOA_FIXED:
+      BignumToFixed(requested_digits, decimal_point,
+                    &numerator, &denominator,
+                    buffer, length);
+      break;
+    case BIGNUM_DTOA_PRECISION:
+      GenerateCountedDigits(requested_digits, decimal_point,
+                            &numerator, &denominator,
+                            buffer, length);
+      break;
+    default:
+      UNREACHABLE();
+  }
+  buffer[*length] = '\0';
+}
+
+
+// The procedure starts generating digits from the left to the right and stops
+// when the generated digits yield the shortest decimal representation of v. A
+// decimal representation of v is a number lying closer to v than to any other
+// double, so it converts to v when read.
+//
+// This is true if d, the decimal representation, is between m- and m+, the
+// upper and lower boundaries. d must be strictly between them if !is_even.
+//           m- := (numerator - delta_minus) / denominator
+//           m+ := (numerator + delta_plus) / denominator
+//
+// Precondition: 0 <= (numerator+delta_plus) / denominator < 10.
+//   If 1 <= (numerator+delta_plus) / denominator < 10 then no leading 0 digit
+//   will be produced. This should be the standard precondition.
+static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator,
+                                   Bignum* delta_minus, Bignum* delta_plus,
+                                   bool is_even,
+                                   Vector<char> buffer, int* length) {
+  // Small optimization: if delta_minus and delta_plus are the same just reuse
+  // one of the two bignums.
+  if (Bignum::Equal(*delta_minus, *delta_plus)) {
+    delta_plus = delta_minus;
+  }
+  *length = 0;
+  while (true) {
+    uint16_t digit;
+    digit = numerator->DivideModuloIntBignum(*denominator);
+    ASSERT(digit <= 9);  // digit is a uint16_t and therefore always positive.
+    // digit = numerator / denominator (integer division).
+    // numerator = numerator % denominator.
+    buffer[(*length)++] = digit + '0';
+
+    // Can we stop already?
+    // If the remainder of the division is less than the distance to the lower
+    // boundary we can stop. In this case we simply round down (discarding the
+    // remainder).
+    // Similarly we test if we can round up (using the upper boundary).
+    bool in_delta_room_minus;
+    bool in_delta_room_plus;
+    if (is_even) {
+      in_delta_room_minus = Bignum::LessEqual(*numerator, *delta_minus);
+    } else {
+      in_delta_room_minus = Bignum::Less(*numerator, *delta_minus);
+    }
+    if (is_even) {
+      in_delta_room_plus =
+          Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0;
+    } else {
+      in_delta_room_plus =
+          Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0;
+    }
+    if (!in_delta_room_minus && !in_delta_room_plus) {
+      // Prepare for next iteration.
+      numerator->Times10();
+      delta_minus->Times10();
+      // We optimized delta_plus to be equal to delta_minus (if they share the
+      // same value). So don't multiply delta_plus if they point to the same
+      // object.
+      if (delta_minus != delta_plus) {
+        delta_plus->Times10();
+      }
+    } else if (in_delta_room_minus && in_delta_room_plus) {
+      // Let's see if 2*numerator < denominator.
+      // If yes, then the next digit would be < 5 and we can round down.
+      int compare = Bignum::PlusCompare(*numerator, *numerator, *denominator);
+      if (compare < 0) {
+        // Remaining digits are less than .5. -> Round down (== do nothing).
+      } else if (compare > 0) {
+        // Remaining digits are more than .5 of denominator. -> Round up.
+        // Note that the last digit could not be a '9' as otherwise the whole
+        // loop would have stopped earlier.
+        // We still have an assert here in case the preconditions were not
+        // satisfied.
+        ASSERT(buffer[(*length) - 1] != '9');
+        buffer[(*length) - 1]++;
+      } else {
+        // Halfway case.
+        // TODO(floitsch): need a way to solve half-way cases.
+        //   For now let's round towards even (since this is what Gay seems to
+        //   do).
+
+        if ((buffer[(*length) - 1] - '0') % 2 == 0) {
+          // Round down => Do nothing.
+        } else {
+          ASSERT(buffer[(*length) - 1] != '9');
+          buffer[(*length) - 1]++;
+        }
+      }
+      return;
+    } else if (in_delta_room_minus) {
+      // Round down (== do nothing).
+      return;
+    } else {  // in_delta_room_plus
+      // Round up.
+      // Note again that the last digit could not be '9' since this would have
+      // stopped the loop earlier.
+      // We still have an ASSERT here, in case the preconditions were not
+      // satisfied.
+      ASSERT(buffer[(*length) -1] != '9');
+      buffer[(*length) - 1]++;
+      return;
+    }
+  }
+}
+
+
+// Let v = numerator / denominator < 10.
+// Then we generate 'count' digits of d = x.xxxxx... (without the decimal point)
+// from left to right. Once 'count' digits have been produced we decide wether
+// to round up or down. Remainders of exactly .5 round upwards. Numbers such
+// as 9.999999 propagate a carry all the way, and change the
+// exponent (decimal_point), when rounding upwards.
+static void GenerateCountedDigits(int count, int* decimal_point,
+                                  Bignum* numerator, Bignum* denominator,
+                                  Vector<char>(buffer), int* length) {
+  ASSERT(count >= 0);
+  for (int i = 0; i < count - 1; ++i) {
+    uint16_t digit;
+    digit = numerator->DivideModuloIntBignum(*denominator);
+    ASSERT(digit <= 9);  // digit is a uint16_t and therefore always positive.
+    // digit = numerator / denominator (integer division).
+    // numerator = numerator % denominator.
+    buffer[i] = digit + '0';
+    // Prepare for next iteration.
+    numerator->Times10();
+  }
+  // Generate the last digit.
+  uint16_t digit;
+  digit = numerator->DivideModuloIntBignum(*denominator);
+  if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) {
+    digit++;
+  }
+  buffer[count - 1] = digit + '0';
+  // Correct bad digits (in case we had a sequence of '9's). Propagate the
+  // carry until we hat a non-'9' or til we reach the first digit.
+  for (int i = count - 1; i > 0; --i) {
+    if (buffer[i] != '0' + 10) break;
+    buffer[i] = '0';
+    buffer[i - 1]++;
+  }
+  if (buffer[0] == '0' + 10) {
+    // Propagate a carry past the top place.
+    buffer[0] = '1';
+    (*decimal_point)++;
+  }
+  *length = count;
+}
+
+
+// Generates 'requested_digits' after the decimal point. It might omit
+// trailing '0's. If the input number is too small then no digits at all are
+// generated (ex.: 2 fixed digits for 0.00001).
+//
+// Input verifies:  1 <= (numerator + delta) / denominator < 10.
+static void BignumToFixed(int requested_digits, int* decimal_point,
+                          Bignum* numerator, Bignum* denominator,
+                          Vector<char>(buffer), int* length) {
+  // Note that we have to look at more than just the requested_digits, since
+  // a number could be rounded up. Example: v=0.5 with requested_digits=0.
+  // Even though the power of v equals 0 we can't just stop here.
+  if (-(*decimal_point) > requested_digits) {
+    // The number is definitively too small.
+    // Ex: 0.001 with requested_digits == 1.
+    // Set decimal-point to -requested_digits. This is what Gay does.
+    // Note that it should not have any effect anyways since the string is
+    // empty.
+    *decimal_point = -requested_digits;
+    *length = 0;
+    return;
+  } else if (-(*decimal_point) == requested_digits) {
+    // We only need to verify if the number rounds down or up.
+    // Ex: 0.04 and 0.06 with requested_digits == 1.
+    ASSERT(*decimal_point == -requested_digits);
+    // Initially the fraction lies in range (1, 10]. Multiply the denominator
+    // by 10 so that we can compare more easily.
+    denominator->Times10();
+    if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) {
+      // If the fraction is >= 0.5 then we have to include the rounded
+      // digit.
+      buffer[0] = '1';
+      *length = 1;
+      (*decimal_point)++;
+    } else {
+      // Note that we caught most of similar cases earlier.
+      *length = 0;
+    }
+    return;
+  } else {
+    // The requested digits correspond to the digits after the point.
+    // The variable 'needed_digits' includes the digits before the point.
+    int needed_digits = (*decimal_point) + requested_digits;
+    GenerateCountedDigits(needed_digits, decimal_point,
+                          numerator, denominator,
+                          buffer, length);
+  }
+}
+
+
+// Returns an estimation of k such that 10^(k-1) <= v < 10^k where
+// v = f * 2^exponent and 2^52 <= f < 2^53.
+// v is hence a normalized double with the given exponent. The output is an
+// approximation for the exponent of the decimal approimation .digits * 10^k.
+//
+// The result might undershoot by 1 in which case 10^k <= v < 10^k+1.
+// Note: this property holds for v's upper boundary m+ too.
+//    10^k <= m+ < 10^k+1.
+//   (see explanation below).
+//
+// Examples:
+//  EstimatePower(0)   => 16
+//  EstimatePower(-52) => 0
+//
+// Note: e >= 0 => EstimatedPower(e) > 0. No similar claim can be made for e<0.
+static int EstimatePower(int exponent) {
+  // This function estimates log10 of v where v = f*2^e (with e == exponent).
+  // Note that 10^floor(log10(v)) <= v, but v <= 10^ceil(log10(v)).
+  // Note that f is bounded by its container size. Let p = 53 (the double's
+  // significand size). Then 2^(p-1) <= f < 2^p.
+  //
+  // Given that log10(v) == log2(v)/log2(10) and e+(len(f)-1) is quite close
+  // to log2(v) the function is simplified to (e+(len(f)-1)/log2(10)).
+  // The computed number undershoots by less than 0.631 (when we compute log3
+  // and not log10).
+  //
+  // Optimization: since we only need an approximated result this computation
+  // can be performed on 64 bit integers. On x86/x64 architecture the speedup is
+  // not really measurable, though.
+  //
+  // Since we want to avoid overshooting we decrement by 1e10 so that
+  // floating-point imprecisions don't affect us.
+  //
+  // Explanation for v's boundary m+: the computation takes advantage of
+  // the fact that 2^(p-1) <= f < 2^p. Boundaries still satisfy this requirement
+  // (even for denormals where the delta can be much more important).
+
+  const double k1Log10 = 0.30102999566398114;  // 1/lg(10)
+
+  // For doubles len(f) == 53 (don't forget the hidden bit).
+  const int kSignificandSize = Double::kSignificandSize;
+  double estimate = ceil((exponent + kSignificandSize - 1) * k1Log10 - 1e-10);
+  return static_cast<int>(estimate);
+}
+
+
+// See comments for InitialScaledStartValues.
+static void InitialScaledStartValuesPositiveExponent(
+    uint64_t significand, int exponent,
+    int estimated_power, bool need_boundary_deltas,
+    Bignum* numerator, Bignum* denominator,
+    Bignum* delta_minus, Bignum* delta_plus) {
+  // A positive exponent implies a positive power.
+  ASSERT(estimated_power >= 0);
+  // Since the estimated_power is positive we simply multiply the denominator
+  // by 10^estimated_power.
+
+  // numerator = v.
+  numerator->AssignUInt64(significand);
+  numerator->ShiftLeft(exponent);
+  // denominator = 10^estimated_power.
+  denominator->AssignPowerUInt16(10, estimated_power);
+
+  if (need_boundary_deltas) {
+    // Introduce a common denominator so that the deltas to the boundaries are
+    // integers.
+    denominator->ShiftLeft(1);
+    numerator->ShiftLeft(1);
+    // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common
+    // denominator (of 2) delta_plus equals 2^e.
+    delta_plus->AssignUInt16(1);
+    delta_plus->ShiftLeft(exponent);
+    // Same for delta_minus. The adjustments if f == 2^p-1 are done later.
+    delta_minus->AssignUInt16(1);
+    delta_minus->ShiftLeft(exponent);
+  }
+}
+
+
+// See comments for InitialScaledStartValues
+static void InitialScaledStartValuesNegativeExponentPositivePower(
+    uint64_t significand, int exponent,
+    int estimated_power, bool need_boundary_deltas,
+    Bignum* numerator, Bignum* denominator,
+    Bignum* delta_minus, Bignum* delta_plus) {
+  // v = f * 2^e with e < 0, and with estimated_power >= 0.
+  // This means that e is close to 0 (have a look at how estimated_power is
+  // computed).
+
+  // numerator = significand
+  //  since v = significand * 2^exponent this is equivalent to
+  //  numerator = v * / 2^-exponent
+  numerator->AssignUInt64(significand);
+  // denominator = 10^estimated_power * 2^-exponent (with exponent < 0)
+  denominator->AssignPowerUInt16(10, estimated_power);
+  denominator->ShiftLeft(-exponent);
+
+  if (need_boundary_deltas) {
+    // Introduce a common denominator so that the deltas to the boundaries are
+    // integers.
+    denominator->ShiftLeft(1);
+    numerator->ShiftLeft(1);
+    // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common
+    // denominator (of 2) delta_plus equals 2^e.
+    // Given that the denominator already includes v's exponent the distance
+    // to the boundaries is simply 1.
+    delta_plus->AssignUInt16(1);
+    // Same for delta_minus. The adjustments if f == 2^p-1 are done later.
+    delta_minus->AssignUInt16(1);
+  }
+}
+
+
+// See comments for InitialScaledStartValues
+static void InitialScaledStartValuesNegativeExponentNegativePower(
+    uint64_t significand, int exponent,
+    int estimated_power, bool need_boundary_deltas,
+    Bignum* numerator, Bignum* denominator,
+    Bignum* delta_minus, Bignum* delta_plus) {
+  // Instead of multiplying the denominator with 10^estimated_power we
+  // multiply all values (numerator and deltas) by 10^-estimated_power.
+
+  // Use numerator as temporary container for power_ten.
+  Bignum* power_ten = numerator;
+  power_ten->AssignPowerUInt16(10, -estimated_power);
+
+  if (need_boundary_deltas) {
+    // Since power_ten == numerator we must make a copy of 10^estimated_power
+    // before we complete the computation of the numerator.
+    // delta_plus = delta_minus = 10^estimated_power
+    delta_plus->AssignBignum(*power_ten);
+    delta_minus->AssignBignum(*power_ten);
+  }
+
+  // numerator = significand * 2 * 10^-estimated_power
+  //  since v = significand * 2^exponent this is equivalent to
+  // numerator = v * 10^-estimated_power * 2 * 2^-exponent.
+  // Remember: numerator has been abused as power_ten. So no need to assign it
+  //  to itself.
+  ASSERT(numerator == power_ten);
+  numerator->MultiplyByUInt64(significand);
+
+  // denominator = 2 * 2^-exponent with exponent < 0.
+  denominator->AssignUInt16(1);
+  denominator->ShiftLeft(-exponent);
+
+  if (need_boundary_deltas) {
+    // Introduce a common denominator so that the deltas to the boundaries are
+    // integers.
+    numerator->ShiftLeft(1);
+    denominator->ShiftLeft(1);
+    // With this shift the boundaries have their correct value, since
+    // delta_plus = 10^-estimated_power, and
+    // delta_minus = 10^-estimated_power.
+    // These assignments have been done earlier.
+    // The adjustments if f == 2^p-1 (lower boundary is closer) are done later.
+  }
+}
+
+
+// Let v = significand * 2^exponent.
+// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator
+// and denominator. The functions GenerateShortestDigits and
+// GenerateCountedDigits will then convert this ratio to its decimal
+// representation d, with the required accuracy.
+// Then d * 10^estimated_power is the representation of v.
+// (Note: the fraction and the estimated_power might get adjusted before
+// generating the decimal representation.)
+//
+// The initial start values consist of:
+//  - a scaled numerator: s.t. numerator/denominator == v / 10^estimated_power.
+//  - a scaled (common) denominator.
+//  optionally (used by GenerateShortestDigits to decide if it has the shortest
+//  decimal converting back to v):
+//  - v - m-: the distance to the lower boundary.
+//  - m+ - v: the distance to the upper boundary.
+//
+// v, m+, m-, and therefore v - m- and m+ - v all share the same denominator.
+//
+// Let ep == estimated_power, then the returned values will satisfy:
+//  v / 10^ep = numerator / denominator.
+//  v's boundarys m- and m+:
+//    m- / 10^ep == v / 10^ep - delta_minus / denominator
+//    m+ / 10^ep == v / 10^ep + delta_plus / denominator
+//  Or in other words:
+//    m- == v - delta_minus * 10^ep / denominator;
+//    m+ == v + delta_plus * 10^ep / denominator;
+//
+// Since 10^(k-1) <= v < 10^k    (with k == estimated_power)
+//  or       10^k <= v < 10^(k+1)
+//  we then have 0.1 <= numerator/denominator < 1
+//           or    1 <= numerator/denominator < 10
+//
+// It is then easy to kickstart the digit-generation routine.
+//
+// The boundary-deltas are only filled if the mode equals BIGNUM_DTOA_SHORTEST
+// or BIGNUM_DTOA_SHORTEST_SINGLE.
+
+static void InitialScaledStartValues(uint64_t significand,
+                                     int exponent,
+                                     bool lower_boundary_is_closer,
+                                     int estimated_power,
+                                     bool need_boundary_deltas,
+                                     Bignum* numerator,
+                                     Bignum* denominator,
+                                     Bignum* delta_minus,
+                                     Bignum* delta_plus) {
+  if (exponent >= 0) {
+    InitialScaledStartValuesPositiveExponent(
+        significand, exponent, estimated_power, need_boundary_deltas,
+        numerator, denominator, delta_minus, delta_plus);
+  } else if (estimated_power >= 0) {
+    InitialScaledStartValuesNegativeExponentPositivePower(
+        significand, exponent, estimated_power, need_boundary_deltas,
+        numerator, denominator, delta_minus, delta_plus);
+  } else {
+    InitialScaledStartValuesNegativeExponentNegativePower(
+        significand, exponent, estimated_power, need_boundary_deltas,
+        numerator, denominator, delta_minus, delta_plus);
+  }
+
+  if (need_boundary_deltas && lower_boundary_is_closer) {
+    // The lower boundary is closer at half the distance of "normal" numbers.
+    // Increase the common denominator and adapt all but the delta_minus.
+    denominator->ShiftLeft(1);  // *2
+    numerator->ShiftLeft(1);    // *2
+    delta_plus->ShiftLeft(1);   // *2
+  }
+}
+
+
+// This routine multiplies numerator/denominator so that its values lies in the
+// range 1-10. That is after a call to this function we have:
+//    1 <= (numerator + delta_plus) /denominator < 10.
+// Let numerator the input before modification and numerator' the argument
+// after modification, then the output-parameter decimal_point is such that
+//  numerator / denominator * 10^estimated_power ==
+//    numerator' / denominator' * 10^(decimal_point - 1)
+// In some cases estimated_power was too low, and this is already the case. We
+// then simply adjust the power so that 10^(k-1) <= v < 10^k (with k ==
+// estimated_power) but do not touch the numerator or denominator.
+// Otherwise the routine multiplies the numerator and the deltas by 10.
+static void FixupMultiply10(int estimated_power, bool is_even,
+                            int* decimal_point,
+                            Bignum* numerator, Bignum* denominator,
+                            Bignum* delta_minus, Bignum* delta_plus) {
+  bool in_range;
+  if (is_even) {
+    // For IEEE doubles half-way cases (in decimal system numbers ending with 5)
+    // are rounded to the closest floating-point number with even significand.
+    in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0;
+  } else {
+    in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0;
+  }
+  if (in_range) {
+    // Since numerator + delta_plus >= denominator we already have
+    // 1 <= numerator/denominator < 10. Simply update the estimated_power.
+    *decimal_point = estimated_power + 1;
+  } else {
+    *decimal_point = estimated_power;
+    numerator->Times10();
+    if (Bignum::Equal(*delta_minus, *delta_plus)) {
+      delta_minus->Times10();
+      delta_plus->AssignBignum(*delta_minus);
+    } else {
+      delta_minus->Times10();
+      delta_plus->Times10();
+    }
+  }
+}
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/bignum-dtoa.h b/klm/util/double-conversion/bignum-dtoa.h
new file mode 100644
index 00000000..34b96199
--- /dev/null
+++ b/klm/util/double-conversion/bignum-dtoa.h
@@ -0,0 +1,84 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_BIGNUM_DTOA_H_
+#define DOUBLE_CONVERSION_BIGNUM_DTOA_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+enum BignumDtoaMode {
+  // Return the shortest correct representation.
+  // For example the output of 0.299999999999999988897 is (the less accurate but
+  // correct) 0.3.
+  BIGNUM_DTOA_SHORTEST,
+  // Same as BIGNUM_DTOA_SHORTEST but for single-precision floats.
+  BIGNUM_DTOA_SHORTEST_SINGLE,
+  // Return a fixed number of digits after the decimal point.
+  // For instance fixed(0.1, 4) becomes 0.1000
+  // If the input number is big, the output will be big.
+  BIGNUM_DTOA_FIXED,
+  // Return a fixed number of digits, no matter what the exponent is.
+  BIGNUM_DTOA_PRECISION
+};
+
+// Converts the given double 'v' to ascii.
+// The result should be interpreted as buffer * 10^(point-length).
+// The buffer will be null-terminated.
+//
+// The input v must be > 0 and different from NaN, and Infinity.
+//
+// The output depends on the given mode:
+//  - SHORTEST: produce the least amount of digits for which the internal
+//   identity requirement is still satisfied. If the digits are printed
+//   (together with the correct exponent) then reading this number will give
+//   'v' again. The buffer will choose the representation that is closest to
+//   'v'. If there are two at the same distance, than the number is round up.
+//   In this mode the 'requested_digits' parameter is ignored.
+//  - FIXED: produces digits necessary to print a given number with
+//   'requested_digits' digits after the decimal point. The produced digits
+//   might be too short in which case the caller has to fill the gaps with '0's.
+//   Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2.
+//   Halfway cases are rounded up. The call toFixed(0.15, 2) thus returns
+//     buffer="2", point=0.
+//   Note: the length of the returned buffer has no meaning wrt the significance
+//   of its digits. That is, just because it contains '0's does not mean that
+//   any other digit would not satisfy the internal identity requirement.
+//  - PRECISION: produces 'requested_digits' where the first digit is not '0'.
+//   Even though the length of produced digits usually equals
+//   'requested_digits', the function is allowed to return fewer digits, in
+//   which case the caller has to fill the missing digits with '0's.
+//   Halfway cases are again rounded up.
+// 'BignumDtoa' expects the given buffer to be big enough to hold all digits
+// and a terminating null-character.
+void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits,
+                Vector<char> buffer, int* length, int* point);
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_BIGNUM_DTOA_H_
diff --git a/klm/util/double-conversion/bignum.cc b/klm/util/double-conversion/bignum.cc
new file mode 100644
index 00000000..747491a0
--- /dev/null
+++ b/klm/util/double-conversion/bignum.cc
@@ -0,0 +1,764 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "bignum.h"
+#include "utils.h"
+
+namespace double_conversion {
+
+Bignum::Bignum()
+    : bigits_(bigits_buffer_, kBigitCapacity), used_digits_(0), exponent_(0) {
+  for (int i = 0; i < kBigitCapacity; ++i) {
+    bigits_[i] = 0;
+  }
+}
+
+
+template<typename S>
+static int BitSize(S value) {
+  return 8 * sizeof(value);
+}
+
+// Guaranteed to lie in one Bigit.
+void Bignum::AssignUInt16(uint16_t value) {
+  ASSERT(kBigitSize >= BitSize(value));
+  Zero();
+  if (value == 0) return;
+
+  EnsureCapacity(1);
+  bigits_[0] = value;
+  used_digits_ = 1;
+}
+
+
+void Bignum::AssignUInt64(uint64_t value) {
+  const int kUInt64Size = 64;
+
+  Zero();
+  if (value == 0) return;
+
+  int needed_bigits = kUInt64Size / kBigitSize + 1;
+  EnsureCapacity(needed_bigits);
+  for (int i = 0; i < needed_bigits; ++i) {
+    bigits_[i] = value & kBigitMask;
+    value = value >> kBigitSize;
+  }
+  used_digits_ = needed_bigits;
+  Clamp();
+}
+
+
+void Bignum::AssignBignum(const Bignum& other) {
+  exponent_ = other.exponent_;
+  for (int i = 0; i < other.used_digits_; ++i) {
+    bigits_[i] = other.bigits_[i];
+  }
+  // Clear the excess digits (if there were any).
+  for (int i = other.used_digits_; i < used_digits_; ++i) {
+    bigits_[i] = 0;
+  }
+  used_digits_ = other.used_digits_;
+}
+
+
+static uint64_t ReadUInt64(Vector<const char> buffer,
+                           int from,
+                           int digits_to_read) {
+  uint64_t result = 0;
+  for (int i = from; i < from + digits_to_read; ++i) {
+    int digit = buffer[i] - '0';
+    ASSERT(0 <= digit && digit <= 9);
+    result = result * 10 + digit;
+  }
+  return result;
+}
+
+
+void Bignum::AssignDecimalString(Vector<const char> value) {
+  // 2^64 = 18446744073709551616 > 10^19
+  const int kMaxUint64DecimalDigits = 19;
+  Zero();
+  int length = value.length();
+  int pos = 0;
+  // Let's just say that each digit needs 4 bits.
+  while (length >= kMaxUint64DecimalDigits) {
+    uint64_t digits = ReadUInt64(value, pos, kMaxUint64DecimalDigits);
+    pos += kMaxUint64DecimalDigits;
+    length -= kMaxUint64DecimalDigits;
+    MultiplyByPowerOfTen(kMaxUint64DecimalDigits);
+    AddUInt64(digits);
+  }
+  uint64_t digits = ReadUInt64(value, pos, length);
+  MultiplyByPowerOfTen(length);
+  AddUInt64(digits);
+  Clamp();
+}
+
+
+static int HexCharValue(char c) {
+  if ('0' <= c && c <= '9') return c - '0';
+  if ('a' <= c && c <= 'f') return 10 + c - 'a';
+  if ('A' <= c && c <= 'F') return 10 + c - 'A';
+  UNREACHABLE();
+  return 0;  // To make compiler happy.
+}
+
+
+void Bignum::AssignHexString(Vector<const char> value) {
+  Zero();
+  int length = value.length();
+
+  int needed_bigits = length * 4 / kBigitSize + 1;
+  EnsureCapacity(needed_bigits);
+  int string_index = length - 1;
+  for (int i = 0; i < needed_bigits - 1; ++i) {
+    // These bigits are guaranteed to be "full".
+    Chunk current_bigit = 0;
+    for (int j = 0; j < kBigitSize / 4; j++) {
+      current_bigit += HexCharValue(value[string_index--]) << (j * 4);
+    }
+    bigits_[i] = current_bigit;
+  }
+  used_digits_ = needed_bigits - 1;
+
+  Chunk most_significant_bigit = 0;  // Could be = 0;
+  for (int j = 0; j <= string_index; ++j) {
+    most_significant_bigit <<= 4;
+    most_significant_bigit += HexCharValue(value[j]);
+  }
+  if (most_significant_bigit != 0) {
+    bigits_[used_digits_] = most_significant_bigit;
+    used_digits_++;
+  }
+  Clamp();
+}
+
+
+void Bignum::AddUInt64(uint64_t operand) {
+  if (operand == 0) return;
+  Bignum other;
+  other.AssignUInt64(operand);
+  AddBignum(other);
+}
+
+
+void Bignum::AddBignum(const Bignum& other) {
+  ASSERT(IsClamped());
+  ASSERT(other.IsClamped());
+
+  // If this has a greater exponent than other append zero-bigits to this.
+  // After this call exponent_ <= other.exponent_.
+  Align(other);
+
+  // There are two possibilities:
+  //   aaaaaaaaaaa 0000  (where the 0s represent a's exponent)
+  //     bbbbb 00000000
+  //   ----------------
+  //   ccccccccccc 0000
+  // or
+  //    aaaaaaaaaa 0000
+  //  bbbbbbbbb 0000000
+  //  -----------------
+  //  cccccccccccc 0000
+  // In both cases we might need a carry bigit.
+
+  EnsureCapacity(1 + Max(BigitLength(), other.BigitLength()) - exponent_);
+  Chunk carry = 0;
+  int bigit_pos = other.exponent_ - exponent_;
+  ASSERT(bigit_pos >= 0);
+  for (int i = 0; i < other.used_digits_; ++i) {
+    Chunk sum = bigits_[bigit_pos] + other.bigits_[i] + carry;
+    bigits_[bigit_pos] = sum & kBigitMask;
+    carry = sum >> kBigitSize;
+    bigit_pos++;
+  }
+
+  while (carry != 0) {
+    Chunk sum = bigits_[bigit_pos] + carry;
+    bigits_[bigit_pos] = sum & kBigitMask;
+    carry = sum >> kBigitSize;
+    bigit_pos++;
+  }
+  used_digits_ = Max(bigit_pos, used_digits_);
+  ASSERT(IsClamped());
+}
+
+
+void Bignum::SubtractBignum(const Bignum& other) {
+  ASSERT(IsClamped());
+  ASSERT(other.IsClamped());
+  // We require this to be bigger than other.
+  ASSERT(LessEqual(other, *this));
+
+  Align(other);
+
+  int offset = other.exponent_ - exponent_;
+  Chunk borrow = 0;
+  int i;
+  for (i = 0; i < other.used_digits_; ++i) {
+    ASSERT((borrow == 0) || (borrow == 1));
+    Chunk difference = bigits_[i + offset] - other.bigits_[i] - borrow;
+    bigits_[i + offset] = difference & kBigitMask;
+    borrow = difference >> (kChunkSize - 1);
+  }
+  while (borrow != 0) {
+    Chunk difference = bigits_[i + offset] - borrow;
+    bigits_[i + offset] = difference & kBigitMask;
+    borrow = difference >> (kChunkSize - 1);
+    ++i;
+  }
+  Clamp();
+}
+
+
+void Bignum::ShiftLeft(int shift_amount) {
+  if (used_digits_ == 0) return;
+  exponent_ += shift_amount / kBigitSize;
+  int local_shift = shift_amount % kBigitSize;
+  EnsureCapacity(used_digits_ + 1);
+  BigitsShiftLeft(local_shift);
+}
+
+
+void Bignum::MultiplyByUInt32(uint32_t factor) {
+  if (factor == 1) return;
+  if (factor == 0) {
+    Zero();
+    return;
+  }
+  if (used_digits_ == 0) return;
+
+  // The product of a bigit with the factor is of size kBigitSize + 32.
+  // Assert that this number + 1 (for the carry) fits into double chunk.
+  ASSERT(kDoubleChunkSize >= kBigitSize + 32 + 1);
+  DoubleChunk carry = 0;
+  for (int i = 0; i < used_digits_; ++i) {
+    DoubleChunk product = static_cast<DoubleChunk>(factor) * bigits_[i] + carry;
+    bigits_[i] = static_cast<Chunk>(product & kBigitMask);
+    carry = (product >> kBigitSize);
+  }
+  while (carry != 0) {
+    EnsureCapacity(used_digits_ + 1);
+    bigits_[used_digits_] = carry & kBigitMask;
+    used_digits_++;
+    carry >>= kBigitSize;
+  }
+}
+
+
+void Bignum::MultiplyByUInt64(uint64_t factor) {
+  if (factor == 1) return;
+  if (factor == 0) {
+    Zero();
+    return;
+  }
+  ASSERT(kBigitSize < 32);
+  uint64_t carry = 0;
+  uint64_t low = factor & 0xFFFFFFFF;
+  uint64_t high = factor >> 32;
+  for (int i = 0; i < used_digits_; ++i) {
+    uint64_t product_low = low * bigits_[i];
+    uint64_t product_high = high * bigits_[i];
+    uint64_t tmp = (carry & kBigitMask) + product_low;
+    bigits_[i] = tmp & kBigitMask;
+    carry = (carry >> kBigitSize) + (tmp >> kBigitSize) +
+        (product_high << (32 - kBigitSize));
+  }
+  while (carry != 0) {
+    EnsureCapacity(used_digits_ + 1);
+    bigits_[used_digits_] = carry & kBigitMask;
+    used_digits_++;
+    carry >>= kBigitSize;
+  }
+}
+
+
+void Bignum::MultiplyByPowerOfTen(int exponent) {
+  const uint64_t kFive27 = UINT64_2PART_C(0x6765c793, fa10079d);
+  const uint16_t kFive1 = 5;
+  const uint16_t kFive2 = kFive1 * 5;
+  const uint16_t kFive3 = kFive2 * 5;
+  const uint16_t kFive4 = kFive3 * 5;
+  const uint16_t kFive5 = kFive4 * 5;
+  const uint16_t kFive6 = kFive5 * 5;
+  const uint32_t kFive7 = kFive6 * 5;
+  const uint32_t kFive8 = kFive7 * 5;
+  const uint32_t kFive9 = kFive8 * 5;
+  const uint32_t kFive10 = kFive9 * 5;
+  const uint32_t kFive11 = kFive10 * 5;
+  const uint32_t kFive12 = kFive11 * 5;
+  const uint32_t kFive13 = kFive12 * 5;
+  const uint32_t kFive1_to_12[] =
+      { kFive1, kFive2, kFive3, kFive4, kFive5, kFive6,
+        kFive7, kFive8, kFive9, kFive10, kFive11, kFive12 };
+
+  ASSERT(exponent >= 0);
+  if (exponent == 0) return;
+  if (used_digits_ == 0) return;
+
+  // We shift by exponent at the end just before returning.
+  int remaining_exponent = exponent;
+  while (remaining_exponent >= 27) {
+    MultiplyByUInt64(kFive27);
+    remaining_exponent -= 27;
+  }
+  while (remaining_exponent >= 13) {
+    MultiplyByUInt32(kFive13);
+    remaining_exponent -= 13;
+  }
+  if (remaining_exponent > 0) {
+    MultiplyByUInt32(kFive1_to_12[remaining_exponent - 1]);
+  }
+  ShiftLeft(exponent);
+}
+
+
+void Bignum::Square() {
+  ASSERT(IsClamped());
+  int product_length = 2 * used_digits_;
+  EnsureCapacity(product_length);
+
+  // Comba multiplication: compute each column separately.
+  // Example: r = a2a1a0 * b2b1b0.
+  //    r =  1    * a0b0 +
+  //        10    * (a1b0 + a0b1) +
+  //        100   * (a2b0 + a1b1 + a0b2) +
+  //        1000  * (a2b1 + a1b2) +
+  //        10000 * a2b2
+  //
+  // In the worst case we have to accumulate nb-digits products of digit*digit.
+  //
+  // Assert that the additional number of bits in a DoubleChunk are enough to
+  // sum up used_digits of Bigit*Bigit.
+  if ((1 << (2 * (kChunkSize - kBigitSize))) <= used_digits_) {
+    UNIMPLEMENTED();
+  }
+  DoubleChunk accumulator = 0;
+  // First shift the digits so we don't overwrite them.
+  int copy_offset = used_digits_;
+  for (int i = 0; i < used_digits_; ++i) {
+    bigits_[copy_offset + i] = bigits_[i];
+  }
+  // We have two loops to avoid some 'if's in the loop.
+  for (int i = 0; i < used_digits_; ++i) {
+    // Process temporary digit i with power i.
+    // The sum of the two indices must be equal to i.
+    int bigit_index1 = i;
+    int bigit_index2 = 0;
+    // Sum all of the sub-products.
+    while (bigit_index1 >= 0) {
+      Chunk chunk1 = bigits_[copy_offset + bigit_index1];
+      Chunk chunk2 = bigits_[copy_offset + bigit_index2];
+      accumulator += static_cast<DoubleChunk>(chunk1) * chunk2;
+      bigit_index1--;
+      bigit_index2++;
+    }
+    bigits_[i] = static_cast<Chunk>(accumulator) & kBigitMask;
+    accumulator >>= kBigitSize;
+  }
+  for (int i = used_digits_; i < product_length; ++i) {
+    int bigit_index1 = used_digits_ - 1;
+    int bigit_index2 = i - bigit_index1;
+    // Invariant: sum of both indices is again equal to i.
+    // Inner loop runs 0 times on last iteration, emptying accumulator.
+    while (bigit_index2 < used_digits_) {
+      Chunk chunk1 = bigits_[copy_offset + bigit_index1];
+      Chunk chunk2 = bigits_[copy_offset + bigit_index2];
+      accumulator += static_cast<DoubleChunk>(chunk1) * chunk2;
+      bigit_index1--;
+      bigit_index2++;
+    }
+    // The overwritten bigits_[i] will never be read in further loop iterations,
+    // because bigit_index1 and bigit_index2 are always greater
+    // than i - used_digits_.
+    bigits_[i] = static_cast<Chunk>(accumulator) & kBigitMask;
+    accumulator >>= kBigitSize;
+  }
+  // Since the result was guaranteed to lie inside the number the
+  // accumulator must be 0 now.
+  ASSERT(accumulator == 0);
+
+  // Don't forget to update the used_digits and the exponent.
+  used_digits_ = product_length;
+  exponent_ *= 2;
+  Clamp();
+}
+
+
+void Bignum::AssignPowerUInt16(uint16_t base, int power_exponent) {
+  ASSERT(base != 0);
+  ASSERT(power_exponent >= 0);
+  if (power_exponent == 0) {
+    AssignUInt16(1);
+    return;
+  }
+  Zero();
+  int shifts = 0;
+  // We expect base to be in range 2-32, and most often to be 10.
+  // It does not make much sense to implement different algorithms for counting
+  // the bits.
+  while ((base & 1) == 0) {
+    base >>= 1;
+    shifts++;
+  }
+  int bit_size = 0;
+  int tmp_base = base;
+  while (tmp_base != 0) {
+    tmp_base >>= 1;
+    bit_size++;
+  }
+  int final_size = bit_size * power_exponent;
+  // 1 extra bigit for the shifting, and one for rounded final_size.
+  EnsureCapacity(final_size / kBigitSize + 2);
+
+  // Left to Right exponentiation.
+  int mask = 1;
+  while (power_exponent >= mask) mask <<= 1;
+
+  // The mask is now pointing to the bit above the most significant 1-bit of
+  // power_exponent.
+  // Get rid of first 1-bit;
+  mask >>= 2;
+  uint64_t this_value = base;
+
+  bool delayed_multipliciation = false;
+  const uint64_t max_32bits = 0xFFFFFFFF;
+  while (mask != 0 && this_value <= max_32bits) {
+    this_value = this_value * this_value;
+    // Verify that there is enough space in this_value to perform the
+    // multiplication.  The first bit_size bits must be 0.
+    if ((power_exponent & mask) != 0) {
+      uint64_t base_bits_mask =
+          ~((static_cast<uint64_t>(1) << (64 - bit_size)) - 1);
+      bool high_bits_zero = (this_value & base_bits_mask) == 0;
+      if (high_bits_zero) {
+        this_value *= base;
+      } else {
+        delayed_multipliciation = true;
+      }
+    }
+    mask >>= 1;
+  }
+  AssignUInt64(this_value);
+  if (delayed_multipliciation) {
+    MultiplyByUInt32(base);
+  }
+
+  // Now do the same thing as a bignum.
+  while (mask != 0) {
+    Square();
+    if ((power_exponent & mask) != 0) {
+      MultiplyByUInt32(base);
+    }
+    mask >>= 1;
+  }
+
+  // And finally add the saved shifts.
+  ShiftLeft(shifts * power_exponent);
+}
+
+
+// Precondition: this/other < 16bit.
+uint16_t Bignum::DivideModuloIntBignum(const Bignum& other) {
+  ASSERT(IsClamped());
+  ASSERT(other.IsClamped());
+  ASSERT(other.used_digits_ > 0);
+
+  // Easy case: if we have less digits than the divisor than the result is 0.
+  // Note: this handles the case where this == 0, too.
+  if (BigitLength() < other.BigitLength()) {
+    return 0;
+  }
+
+  Align(other);
+
+  uint16_t result = 0;
+
+  // Start by removing multiples of 'other' until both numbers have the same
+  // number of digits.
+  while (BigitLength() > other.BigitLength()) {
+    // This naive approach is extremely inefficient if the this divided other
+    // might be big. This function is implemented for doubleToString where
+    // the result should be small (less than 10).
+    ASSERT(other.bigits_[other.used_digits_ - 1] >= ((1 << kBigitSize) / 16));
+    // Remove the multiples of the first digit.
+    // Example this = 23 and other equals 9. -> Remove 2 multiples.
+    result += bigits_[used_digits_ - 1];
+    SubtractTimes(other, bigits_[used_digits_ - 1]);
+  }
+
+  ASSERT(BigitLength() == other.BigitLength());
+
+  // Both bignums are at the same length now.
+  // Since other has more than 0 digits we know that the access to
+  // bigits_[used_digits_ - 1] is safe.
+  Chunk this_bigit = bigits_[used_digits_ - 1];
+  Chunk other_bigit = other.bigits_[other.used_digits_ - 1];
+
+  if (other.used_digits_ == 1) {
+    // Shortcut for easy (and common) case.
+    int quotient = this_bigit / other_bigit;
+    bigits_[used_digits_ - 1] = this_bigit - other_bigit * quotient;
+    result += quotient;
+    Clamp();
+    return result;
+  }
+
+  int division_estimate = this_bigit / (other_bigit + 1);
+  result += division_estimate;
+  SubtractTimes(other, division_estimate);
+
+  if (other_bigit * (division_estimate + 1) > this_bigit) {
+    // No need to even try to subtract. Even if other's remaining digits were 0
+    // another subtraction would be too much.
+    return result;
+  }
+
+  while (LessEqual(other, *this)) {
+    SubtractBignum(other);
+    result++;
+  }
+  return result;
+}
+
+
+template<typename S>
+static int SizeInHexChars(S number) {
+  ASSERT(number > 0);
+  int result = 0;
+  while (number != 0) {
+    number >>= 4;
+    result++;
+  }
+  return result;
+}
+
+
+static char HexCharOfValue(int value) {
+  ASSERT(0 <= value && value <= 16);
+  if (value < 10) return value + '0';
+  return value - 10 + 'A';
+}
+
+
+bool Bignum::ToHexString(char* buffer, int buffer_size) const {
+  ASSERT(IsClamped());
+  // Each bigit must be printable as separate hex-character.
+  ASSERT(kBigitSize % 4 == 0);
+  const int kHexCharsPerBigit = kBigitSize / 4;
+
+  if (used_digits_ == 0) {
+    if (buffer_size < 2) return false;
+    buffer[0] = '0';
+    buffer[1] = '\0';
+    return true;
+  }
+  // We add 1 for the terminating '\0' character.
+  int needed_chars = (BigitLength() - 1) * kHexCharsPerBigit +
+      SizeInHexChars(bigits_[used_digits_ - 1]) + 1;
+  if (needed_chars > buffer_size) return false;
+  int string_index = needed_chars - 1;
+  buffer[string_index--] = '\0';
+  for (int i = 0; i < exponent_; ++i) {
+    for (int j = 0; j < kHexCharsPerBigit; ++j) {
+      buffer[string_index--] = '0';
+    }
+  }
+  for (int i = 0; i < used_digits_ - 1; ++i) {
+    Chunk current_bigit = bigits_[i];
+    for (int j = 0; j < kHexCharsPerBigit; ++j) {
+      buffer[string_index--] = HexCharOfValue(current_bigit & 0xF);
+      current_bigit >>= 4;
+    }
+  }
+  // And finally the last bigit.
+  Chunk most_significant_bigit = bigits_[used_digits_ - 1];
+  while (most_significant_bigit != 0) {
+    buffer[string_index--] = HexCharOfValue(most_significant_bigit & 0xF);
+    most_significant_bigit >>= 4;
+  }
+  return true;
+}
+
+
+Bignum::Chunk Bignum::BigitAt(int index) const {
+  if (index >= BigitLength()) return 0;
+  if (index < exponent_) return 0;
+  return bigits_[index - exponent_];
+}
+
+
+int Bignum::Compare(const Bignum& a, const Bignum& b) {
+  ASSERT(a.IsClamped());
+  ASSERT(b.IsClamped());
+  int bigit_length_a = a.BigitLength();
+  int bigit_length_b = b.BigitLength();
+  if (bigit_length_a < bigit_length_b) return -1;
+  if (bigit_length_a > bigit_length_b) return +1;
+  for (int i = bigit_length_a - 1; i >= Min(a.exponent_, b.exponent_); --i) {
+    Chunk bigit_a = a.BigitAt(i);
+    Chunk bigit_b = b.BigitAt(i);
+    if (bigit_a < bigit_b) return -1;
+    if (bigit_a > bigit_b) return +1;
+    // Otherwise they are equal up to this digit. Try the next digit.
+  }
+  return 0;
+}
+
+
+int Bignum::PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c) {
+  ASSERT(a.IsClamped());
+  ASSERT(b.IsClamped());
+  ASSERT(c.IsClamped());
+  if (a.BigitLength() < b.BigitLength()) {
+    return PlusCompare(b, a, c);
+  }
+  if (a.BigitLength() + 1 < c.BigitLength()) return -1;
+  if (a.BigitLength() > c.BigitLength()) return +1;
+  // The exponent encodes 0-bigits. So if there are more 0-digits in 'a' than
+  // 'b' has digits, then the bigit-length of 'a'+'b' must be equal to the one
+  // of 'a'.
+  if (a.exponent_ >= b.BigitLength() && a.BigitLength() < c.BigitLength()) {
+    return -1;
+  }
+
+  Chunk borrow = 0;
+  // Starting at min_exponent all digits are == 0. So no need to compare them.
+  int min_exponent = Min(Min(a.exponent_, b.exponent_), c.exponent_);
+  for (int i = c.BigitLength() - 1; i >= min_exponent; --i) {
+    Chunk chunk_a = a.BigitAt(i);
+    Chunk chunk_b = b.BigitAt(i);
+    Chunk chunk_c = c.BigitAt(i);
+    Chunk sum = chunk_a + chunk_b;
+    if (sum > chunk_c + borrow) {
+      return +1;
+    } else {
+      borrow = chunk_c + borrow - sum;
+      if (borrow > 1) return -1;
+      borrow <<= kBigitSize;
+    }
+  }
+  if (borrow == 0) return 0;
+  return -1;
+}
+
+
+void Bignum::Clamp() {
+  while (used_digits_ > 0 && bigits_[used_digits_ - 1] == 0) {
+    used_digits_--;
+  }
+  if (used_digits_ == 0) {
+    // Zero.
+    exponent_ = 0;
+  }
+}
+
+
+bool Bignum::IsClamped() const {
+  return used_digits_ == 0 || bigits_[used_digits_ - 1] != 0;
+}
+
+
+void Bignum::Zero() {
+  for (int i = 0; i < used_digits_; ++i) {
+    bigits_[i] = 0;
+  }
+  used_digits_ = 0;
+  exponent_ = 0;
+}
+
+
+void Bignum::Align(const Bignum& other) {
+  if (exponent_ > other.exponent_) {
+    // If "X" represents a "hidden" digit (by the exponent) then we are in the
+    // following case (a == this, b == other):
+    // a:  aaaaaaXXXX   or a:   aaaaaXXX
+    // b:     bbbbbbX      b: bbbbbbbbXX
+    // We replace some of the hidden digits (X) of a with 0 digits.
+    // a:  aaaaaa000X   or a:   aaaaa0XX
+    int zero_digits = exponent_ - other.exponent_;
+    EnsureCapacity(used_digits_ + zero_digits);
+    for (int i = used_digits_ - 1; i >= 0; --i) {
+      bigits_[i + zero_digits] = bigits_[i];
+    }
+    for (int i = 0; i < zero_digits; ++i) {
+      bigits_[i] = 0;
+    }
+    used_digits_ += zero_digits;
+    exponent_ -= zero_digits;
+    ASSERT(used_digits_ >= 0);
+    ASSERT(exponent_ >= 0);
+  }
+}
+
+
+void Bignum::BigitsShiftLeft(int shift_amount) {
+  ASSERT(shift_amount < kBigitSize);
+  ASSERT(shift_amount >= 0);
+  Chunk carry = 0;
+  for (int i = 0; i < used_digits_; ++i) {
+    Chunk new_carry = bigits_[i] >> (kBigitSize - shift_amount);
+    bigits_[i] = ((bigits_[i] << shift_amount) + carry) & kBigitMask;
+    carry = new_carry;
+  }
+  if (carry != 0) {
+    bigits_[used_digits_] = carry;
+    used_digits_++;
+  }
+}
+
+
+void Bignum::SubtractTimes(const Bignum& other, int factor) {
+  ASSERT(exponent_ <= other.exponent_);
+  if (factor < 3) {
+    for (int i = 0; i < factor; ++i) {
+      SubtractBignum(other);
+    }
+    return;
+  }
+  Chunk borrow = 0;
+  int exponent_diff = other.exponent_ - exponent_;
+  for (int i = 0; i < other.used_digits_; ++i) {
+    DoubleChunk product = static_cast<DoubleChunk>(factor) * other.bigits_[i];
+    DoubleChunk remove = borrow + product;
+    Chunk difference = bigits_[i + exponent_diff] - (remove & kBigitMask);
+    bigits_[i + exponent_diff] = difference & kBigitMask;
+    borrow = static_cast<Chunk>((difference >> (kChunkSize - 1)) +
+                                (remove >> kBigitSize));
+  }
+  for (int i = other.used_digits_ + exponent_diff; i < used_digits_; ++i) {
+    if (borrow == 0) return;
+    Chunk difference = bigits_[i] - borrow;
+    bigits_[i] = difference & kBigitMask;
+    borrow = difference >> (kChunkSize - 1);
+    ++i;
+  }
+  Clamp();
+}
+
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/bignum.h b/klm/util/double-conversion/bignum.h
new file mode 100644
index 00000000..5ec3544f
--- /dev/null
+++ b/klm/util/double-conversion/bignum.h
@@ -0,0 +1,145 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_BIGNUM_H_
+#define DOUBLE_CONVERSION_BIGNUM_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+class Bignum {
+ public:
+  // 3584 = 128 * 28. We can represent 2^3584 > 10^1000 accurately.
+  // This bignum can encode much bigger numbers, since it contains an
+  // exponent.
+  static const int kMaxSignificantBits = 3584;
+
+  Bignum();
+  void AssignUInt16(uint16_t value);
+  void AssignUInt64(uint64_t value);
+  void AssignBignum(const Bignum& other);
+
+  void AssignDecimalString(Vector<const char> value);
+  void AssignHexString(Vector<const char> value);
+
+  void AssignPowerUInt16(uint16_t base, int exponent);
+
+  void AddUInt16(uint16_t operand);
+  void AddUInt64(uint64_t operand);
+  void AddBignum(const Bignum& other);
+  // Precondition: this >= other.
+  void SubtractBignum(const Bignum& other);
+
+  void Square();
+  void ShiftLeft(int shift_amount);
+  void MultiplyByUInt32(uint32_t factor);
+  void MultiplyByUInt64(uint64_t factor);
+  void MultiplyByPowerOfTen(int exponent);
+  void Times10() { return MultiplyByUInt32(10); }
+  // Pseudocode:
+  //  int result = this / other;
+  //  this = this % other;
+  // In the worst case this function is in O(this/other).
+  uint16_t DivideModuloIntBignum(const Bignum& other);
+
+  bool ToHexString(char* buffer, int buffer_size) const;
+
+  // Returns
+  //  -1 if a < b,
+  //   0 if a == b, and
+  //  +1 if a > b.
+  static int Compare(const Bignum& a, const Bignum& b);
+  static bool Equal(const Bignum& a, const Bignum& b) {
+    return Compare(a, b) == 0;
+  }
+  static bool LessEqual(const Bignum& a, const Bignum& b) {
+    return Compare(a, b) <= 0;
+  }
+  static bool Less(const Bignum& a, const Bignum& b) {
+    return Compare(a, b) < 0;
+  }
+  // Returns Compare(a + b, c);
+  static int PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c);
+  // Returns a + b == c
+  static bool PlusEqual(const Bignum& a, const Bignum& b, const Bignum& c) {
+    return PlusCompare(a, b, c) == 0;
+  }
+  // Returns a + b <= c
+  static bool PlusLessEqual(const Bignum& a, const Bignum& b, const Bignum& c) {
+    return PlusCompare(a, b, c) <= 0;
+  }
+  // Returns a + b < c
+  static bool PlusLess(const Bignum& a, const Bignum& b, const Bignum& c) {
+    return PlusCompare(a, b, c) < 0;
+  }
+ private:
+  typedef uint32_t Chunk;
+  typedef uint64_t DoubleChunk;
+
+  static const int kChunkSize = sizeof(Chunk) * 8;
+  static const int kDoubleChunkSize = sizeof(DoubleChunk) * 8;
+  // With bigit size of 28 we loose some bits, but a double still fits easily
+  // into two chunks, and more importantly we can use the Comba multiplication.
+  static const int kBigitSize = 28;
+  static const Chunk kBigitMask = (1 << kBigitSize) - 1;
+  // Every instance allocates kBigitLength chunks on the stack. Bignums cannot
+  // grow. There are no checks if the stack-allocated space is sufficient.
+  static const int kBigitCapacity = kMaxSignificantBits / kBigitSize;
+
+  void EnsureCapacity(int size) {
+    if (size > kBigitCapacity) {
+      UNREACHABLE();
+    }
+  }
+  void Align(const Bignum& other);
+  void Clamp();
+  bool IsClamped() const;
+  void Zero();
+  // Requires this to have enough capacity (no tests done).
+  // Updates used_digits_ if necessary.
+  // shift_amount must be < kBigitSize.
+  void BigitsShiftLeft(int shift_amount);
+  // BigitLength includes the "hidden" digits encoded in the exponent.
+  int BigitLength() const { return used_digits_ + exponent_; }
+  Chunk BigitAt(int index) const;
+  void SubtractTimes(const Bignum& other, int factor);
+
+  Chunk bigits_buffer_[kBigitCapacity];
+  // A vector backed by bigits_buffer_. This way accesses to the array are
+  // checked for out-of-bounds errors.
+  Vector<Chunk> bigits_;
+  int used_digits_;
+  // The Bignum's value equals value(bigits_) * 2^(exponent_ * kBigitSize).
+  int exponent_;
+
+  DISALLOW_COPY_AND_ASSIGN(Bignum);
+};
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_BIGNUM_H_
diff --git a/klm/util/double-conversion/cached-powers.cc b/klm/util/double-conversion/cached-powers.cc
new file mode 100644
index 00000000..c6764291
--- /dev/null
+++ b/klm/util/double-conversion/cached-powers.cc
@@ -0,0 +1,175 @@
+// Copyright 2006-2008 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <stdarg.h>
+#include <limits.h>
+#include <math.h>
+
+#include "utils.h"
+
+#include "cached-powers.h"
+
+namespace double_conversion {
+
+struct CachedPower {
+  uint64_t significand;
+  int16_t binary_exponent;
+  int16_t decimal_exponent;
+};
+
+static const CachedPower kCachedPowers[] = {
+  {UINT64_2PART_C(0xfa8fd5a0, 081c0288), -1220, -348},
+  {UINT64_2PART_C(0xbaaee17f, a23ebf76), -1193, -340},
+  {UINT64_2PART_C(0x8b16fb20, 3055ac76), -1166, -332},
+  {UINT64_2PART_C(0xcf42894a, 5dce35ea), -1140, -324},
+  {UINT64_2PART_C(0x9a6bb0aa, 55653b2d), -1113, -316},
+  {UINT64_2PART_C(0xe61acf03, 3d1a45df), -1087, -308},
+  {UINT64_2PART_C(0xab70fe17, c79ac6ca), -1060, -300},
+  {UINT64_2PART_C(0xff77b1fc, bebcdc4f), -1034, -292},
+  {UINT64_2PART_C(0xbe5691ef, 416bd60c), -1007, -284},
+  {UINT64_2PART_C(0x8dd01fad, 907ffc3c), -980, -276},
+  {UINT64_2PART_C(0xd3515c28, 31559a83), -954, -268},
+  {UINT64_2PART_C(0x9d71ac8f, ada6c9b5), -927, -260},
+  {UINT64_2PART_C(0xea9c2277, 23ee8bcb), -901, -252},
+  {UINT64_2PART_C(0xaecc4991, 4078536d), -874, -244},
+  {UINT64_2PART_C(0x823c1279, 5db6ce57), -847, -236},
+  {UINT64_2PART_C(0xc2109436, 4dfb5637), -821, -228},
+  {UINT64_2PART_C(0x9096ea6f, 3848984f), -794, -220},
+  {UINT64_2PART_C(0xd77485cb, 25823ac7), -768, -212},
+  {UINT64_2PART_C(0xa086cfcd, 97bf97f4), -741, -204},
+  {UINT64_2PART_C(0xef340a98, 172aace5), -715, -196},
+  {UINT64_2PART_C(0xb23867fb, 2a35b28e), -688, -188},
+  {UINT64_2PART_C(0x84c8d4df, d2c63f3b), -661, -180},
+  {UINT64_2PART_C(0xc5dd4427, 1ad3cdba), -635, -172},
+  {UINT64_2PART_C(0x936b9fce, bb25c996), -608, -164},
+  {UINT64_2PART_C(0xdbac6c24, 7d62a584), -582, -156},
+  {UINT64_2PART_C(0xa3ab6658, 0d5fdaf6), -555, -148},
+  {UINT64_2PART_C(0xf3e2f893, dec3f126), -529, -140},
+  {UINT64_2PART_C(0xb5b5ada8, aaff80b8), -502, -132},
+  {UINT64_2PART_C(0x87625f05, 6c7c4a8b), -475, -124},
+  {UINT64_2PART_C(0xc9bcff60, 34c13053), -449, -116},
+  {UINT64_2PART_C(0x964e858c, 91ba2655), -422, -108},
+  {UINT64_2PART_C(0xdff97724, 70297ebd), -396, -100},
+  {UINT64_2PART_C(0xa6dfbd9f, b8e5b88f), -369, -92},
+  {UINT64_2PART_C(0xf8a95fcf, 88747d94), -343, -84},
+  {UINT64_2PART_C(0xb9447093, 8fa89bcf), -316, -76},
+  {UINT64_2PART_C(0x8a08f0f8, bf0f156b), -289, -68},
+  {UINT64_2PART_C(0xcdb02555, 653131b6), -263, -60},
+  {UINT64_2PART_C(0x993fe2c6, d07b7fac), -236, -52},
+  {UINT64_2PART_C(0xe45c10c4, 2a2b3b06), -210, -44},
+  {UINT64_2PART_C(0xaa242499, 697392d3), -183, -36},
+  {UINT64_2PART_C(0xfd87b5f2, 8300ca0e), -157, -28},
+  {UINT64_2PART_C(0xbce50864, 92111aeb), -130, -20},
+  {UINT64_2PART_C(0x8cbccc09, 6f5088cc), -103, -12},
+  {UINT64_2PART_C(0xd1b71758, e219652c), -77, -4},
+  {UINT64_2PART_C(0x9c400000, 00000000), -50, 4},
+  {UINT64_2PART_C(0xe8d4a510, 00000000), -24, 12},
+  {UINT64_2PART_C(0xad78ebc5, ac620000), 3, 20},
+  {UINT64_2PART_C(0x813f3978, f8940984), 30, 28},
+  {UINT64_2PART_C(0xc097ce7b, c90715b3), 56, 36},
+  {UINT64_2PART_C(0x8f7e32ce, 7bea5c70), 83, 44},
+  {UINT64_2PART_C(0xd5d238a4, abe98068), 109, 52},
+  {UINT64_2PART_C(0x9f4f2726, 179a2245), 136, 60},
+  {UINT64_2PART_C(0xed63a231, d4c4fb27), 162, 68},
+  {UINT64_2PART_C(0xb0de6538, 8cc8ada8), 189, 76},
+  {UINT64_2PART_C(0x83c7088e, 1aab65db), 216, 84},
+  {UINT64_2PART_C(0xc45d1df9, 42711d9a), 242, 92},
+  {UINT64_2PART_C(0x924d692c, a61be758), 269, 100},
+  {UINT64_2PART_C(0xda01ee64, 1a708dea), 295, 108},
+  {UINT64_2PART_C(0xa26da399, 9aef774a), 322, 116},
+  {UINT64_2PART_C(0xf209787b, b47d6b85), 348, 124},
+  {UINT64_2PART_C(0xb454e4a1, 79dd1877), 375, 132},
+  {UINT64_2PART_C(0x865b8692, 5b9bc5c2), 402, 140},
+  {UINT64_2PART_C(0xc83553c5, c8965d3d), 428, 148},
+  {UINT64_2PART_C(0x952ab45c, fa97a0b3), 455, 156},
+  {UINT64_2PART_C(0xde469fbd, 99a05fe3), 481, 164},
+  {UINT64_2PART_C(0xa59bc234, db398c25), 508, 172},
+  {UINT64_2PART_C(0xf6c69a72, a3989f5c), 534, 180},
+  {UINT64_2PART_C(0xb7dcbf53, 54e9bece), 561, 188},
+  {UINT64_2PART_C(0x88fcf317, f22241e2), 588, 196},
+  {UINT64_2PART_C(0xcc20ce9b, d35c78a5), 614, 204},
+  {UINT64_2PART_C(0x98165af3, 7b2153df), 641, 212},
+  {UINT64_2PART_C(0xe2a0b5dc, 971f303a), 667, 220},
+  {UINT64_2PART_C(0xa8d9d153, 5ce3b396), 694, 228},
+  {UINT64_2PART_C(0xfb9b7cd9, a4a7443c), 720, 236},
+  {UINT64_2PART_C(0xbb764c4c, a7a44410), 747, 244},
+  {UINT64_2PART_C(0x8bab8eef, b6409c1a), 774, 252},
+  {UINT64_2PART_C(0xd01fef10, a657842c), 800, 260},
+  {UINT64_2PART_C(0x9b10a4e5, e9913129), 827, 268},
+  {UINT64_2PART_C(0xe7109bfb, a19c0c9d), 853, 276},
+  {UINT64_2PART_C(0xac2820d9, 623bf429), 880, 284},
+  {UINT64_2PART_C(0x80444b5e, 7aa7cf85), 907, 292},
+  {UINT64_2PART_C(0xbf21e440, 03acdd2d), 933, 300},
+  {UINT64_2PART_C(0x8e679c2f, 5e44ff8f), 960, 308},
+  {UINT64_2PART_C(0xd433179d, 9c8cb841), 986, 316},
+  {UINT64_2PART_C(0x9e19db92, b4e31ba9), 1013, 324},
+  {UINT64_2PART_C(0xeb96bf6e, badf77d9), 1039, 332},
+  {UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340},
+};
+
+static const int kCachedPowersLength = ARRAY_SIZE(kCachedPowers);
+static const int kCachedPowersOffset = 348;  // -1 * the first decimal_exponent.
+static const double kD_1_LOG2_10 = 0.30102999566398114;  //  1 / lg(10)
+// Difference between the decimal exponents in the table above.
+const int PowersOfTenCache::kDecimalExponentDistance = 8;
+const int PowersOfTenCache::kMinDecimalExponent = -348;
+const int PowersOfTenCache::kMaxDecimalExponent = 340;
+
+void PowersOfTenCache::GetCachedPowerForBinaryExponentRange(
+    int min_exponent,
+    int max_exponent,
+    DiyFp* power,
+    int* decimal_exponent) {
+  int kQ = DiyFp::kSignificandSize;
+  double k = ceil((min_exponent + kQ - 1) * kD_1_LOG2_10);
+  int foo = kCachedPowersOffset;
+  int index =
+      (foo + static_cast<int>(k) - 1) / kDecimalExponentDistance + 1;
+  ASSERT(0 <= index && index < kCachedPowersLength);
+  CachedPower cached_power = kCachedPowers[index];
+  ASSERT(min_exponent <= cached_power.binary_exponent);
+  ASSERT(cached_power.binary_exponent <= max_exponent);
+  *decimal_exponent = cached_power.decimal_exponent;
+  *power = DiyFp(cached_power.significand, cached_power.binary_exponent);
+}
+
+
+void PowersOfTenCache::GetCachedPowerForDecimalExponent(int requested_exponent,
+                                                        DiyFp* power,
+                                                        int* found_exponent) {
+  ASSERT(kMinDecimalExponent <= requested_exponent);
+  ASSERT(requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance);
+  int index =
+      (requested_exponent + kCachedPowersOffset) / kDecimalExponentDistance;
+  CachedPower cached_power = kCachedPowers[index];
+  *power = DiyFp(cached_power.significand, cached_power.binary_exponent);
+  *found_exponent = cached_power.decimal_exponent;
+  ASSERT(*found_exponent <= requested_exponent);
+  ASSERT(requested_exponent < *found_exponent + kDecimalExponentDistance);
+}
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/cached-powers.h b/klm/util/double-conversion/cached-powers.h
new file mode 100644
index 00000000..61a50614
--- /dev/null
+++ b/klm/util/double-conversion/cached-powers.h
@@ -0,0 +1,64 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_CACHED_POWERS_H_
+#define DOUBLE_CONVERSION_CACHED_POWERS_H_
+
+#include "diy-fp.h"
+
+namespace double_conversion {
+
+class PowersOfTenCache {
+ public:
+
+  // Not all powers of ten are cached. The decimal exponent of two neighboring
+  // cached numbers will differ by kDecimalExponentDistance.
+  static const int kDecimalExponentDistance;
+
+  static const int kMinDecimalExponent;
+  static const int kMaxDecimalExponent;
+
+  // Returns a cached power-of-ten with a binary exponent in the range
+  // [min_exponent; max_exponent] (boundaries included).
+  static void GetCachedPowerForBinaryExponentRange(int min_exponent,
+                                                   int max_exponent,
+                                                   DiyFp* power,
+                                                   int* decimal_exponent);
+
+  // Returns a cached power of ten x ~= 10^k such that
+  //   k <= decimal_exponent < k + kCachedPowersDecimalDistance.
+  // The given decimal_exponent must satisfy
+  //   kMinDecimalExponent <= requested_exponent, and
+  //   requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance.
+  static void GetCachedPowerForDecimalExponent(int requested_exponent,
+                                               DiyFp* power,
+                                               int* found_exponent);
+};
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_CACHED_POWERS_H_
diff --git a/klm/util/double-conversion/diy-fp.cc b/klm/util/double-conversion/diy-fp.cc
new file mode 100644
index 00000000..ddd1891b
--- /dev/null
+++ b/klm/util/double-conversion/diy-fp.cc
@@ -0,0 +1,57 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#include "diy-fp.h"
+#include "utils.h"
+
+namespace double_conversion {
+
+void DiyFp::Multiply(const DiyFp& other) {
+  // Simply "emulates" a 128 bit multiplication.
+  // However: the resulting number only contains 64 bits. The least
+  // significant 64 bits are only used for rounding the most significant 64
+  // bits.
+  const uint64_t kM32 = 0xFFFFFFFFU;
+  uint64_t a = f_ >> 32;
+  uint64_t b = f_ & kM32;
+  uint64_t c = other.f_ >> 32;
+  uint64_t d = other.f_ & kM32;
+  uint64_t ac = a * c;
+  uint64_t bc = b * c;
+  uint64_t ad = a * d;
+  uint64_t bd = b * d;
+  uint64_t tmp = (bd >> 32) + (ad & kM32) + (bc & kM32);
+  // By adding 1U << 31 to tmp we round the final result.
+  // Halfway cases will be round up.
+  tmp += 1U << 31;
+  uint64_t result_f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32);
+  e_ += other.e_ + 64;
+  f_ = result_f;
+}
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/diy-fp.h b/klm/util/double-conversion/diy-fp.h
new file mode 100644
index 00000000..9dcf8fbd
--- /dev/null
+++ b/klm/util/double-conversion/diy-fp.h
@@ -0,0 +1,118 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_DIY_FP_H_
+#define DOUBLE_CONVERSION_DIY_FP_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+// This "Do It Yourself Floating Point" class implements a floating-point number
+// with a uint64 significand and an int exponent. Normalized DiyFp numbers will
+// have the most significant bit of the significand set.
+// Multiplication and Subtraction do not normalize their results.
+// DiyFp are not designed to contain special doubles (NaN and Infinity).
+class DiyFp {
+ public:
+  static const int kSignificandSize = 64;
+
+  DiyFp() : f_(0), e_(0) {}
+  DiyFp(uint64_t f, int e) : f_(f), e_(e) {}
+
+  // this = this - other.
+  // The exponents of both numbers must be the same and the significand of this
+  // must be bigger than the significand of other.
+  // The result will not be normalized.
+  void Subtract(const DiyFp& other) {
+    ASSERT(e_ == other.e_);
+    ASSERT(f_ >= other.f_);
+    f_ -= other.f_;
+  }
+
+  // Returns a - b.
+  // The exponents of both numbers must be the same and this must be bigger
+  // than other. The result will not be normalized.
+  static DiyFp Minus(const DiyFp& a, const DiyFp& b) {
+    DiyFp result = a;
+    result.Subtract(b);
+    return result;
+  }
+
+
+  // this = this * other.
+  void Multiply(const DiyFp& other);
+
+  // returns a * b;
+  static DiyFp Times(const DiyFp& a, const DiyFp& b) {
+    DiyFp result = a;
+    result.Multiply(b);
+    return result;
+  }
+
+  void Normalize() {
+    ASSERT(f_ != 0);
+    uint64_t f = f_;
+    int e = e_;
+
+    // This method is mainly called for normalizing boundaries. In general
+    // boundaries need to be shifted by 10 bits. We thus optimize for this case.
+    const uint64_t k10MSBits = UINT64_2PART_C(0xFFC00000, 00000000);
+    while ((f & k10MSBits) == 0) {
+      f <<= 10;
+      e -= 10;
+    }
+    while ((f & kUint64MSB) == 0) {
+      f <<= 1;
+      e--;
+    }
+    f_ = f;
+    e_ = e;
+  }
+
+  static DiyFp Normalize(const DiyFp& a) {
+    DiyFp result = a;
+    result.Normalize();
+    return result;
+  }
+
+  uint64_t f() const { return f_; }
+  int e() const { return e_; }
+
+  void set_f(uint64_t new_value) { f_ = new_value; }
+  void set_e(int new_value) { e_ = new_value; }
+
+ private:
+  static const uint64_t kUint64MSB = UINT64_2PART_C(0x80000000, 00000000);
+
+  uint64_t f_;
+  int e_;
+};
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_DIY_FP_H_
diff --git a/klm/util/double-conversion/double-conversion.cc b/klm/util/double-conversion/double-conversion.cc
new file mode 100644
index 00000000..febba6cd
--- /dev/null
+++ b/klm/util/double-conversion/double-conversion.cc
@@ -0,0 +1,889 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <limits.h>
+#include <math.h>
+
+#include "double-conversion.h"
+
+#include "bignum-dtoa.h"
+#include "fast-dtoa.h"
+#include "fixed-dtoa.h"
+#include "ieee.h"
+#include "strtod.h"
+#include "utils.h"
+
+namespace double_conversion {
+
+const DoubleToStringConverter& DoubleToStringConverter::EcmaScriptConverter() {
+  int flags = UNIQUE_ZERO | EMIT_POSITIVE_EXPONENT_SIGN;
+  static DoubleToStringConverter converter(flags,
+                                           "Infinity",
+                                           "NaN",
+                                           'e',
+                                           -6, 21,
+                                           6, 0);
+  return converter;
+}
+
+
+bool DoubleToStringConverter::HandleSpecialValues(
+    double value,
+    StringBuilder* result_builder) const {
+  Double double_inspect(value);
+  if (double_inspect.IsInfinite()) {
+    if (infinity_symbol_ == NULL) return false;
+    if (value < 0) {
+      result_builder->AddCharacter('-');
+    }
+    result_builder->AddString(infinity_symbol_);
+    return true;
+  }
+  if (double_inspect.IsNan()) {
+    if (nan_symbol_ == NULL) return false;
+    result_builder->AddString(nan_symbol_);
+    return true;
+  }
+  return false;
+}
+
+
+void DoubleToStringConverter::CreateExponentialRepresentation(
+    const char* decimal_digits,
+    int length,
+    int exponent,
+    StringBuilder* result_builder) const {
+  ASSERT(length != 0);
+  result_builder->AddCharacter(decimal_digits[0]);
+  if (length != 1) {
+    result_builder->AddCharacter('.');
+    result_builder->AddSubstring(&decimal_digits[1], length-1);
+  }
+  result_builder->AddCharacter(exponent_character_);
+  if (exponent < 0) {
+    result_builder->AddCharacter('-');
+    exponent = -exponent;
+  } else {
+    if ((flags_ & EMIT_POSITIVE_EXPONENT_SIGN) != 0) {
+      result_builder->AddCharacter('+');
+    }
+  }
+  if (exponent == 0) {
+    result_builder->AddCharacter('0');
+    return;
+  }
+  ASSERT(exponent < 1e4);
+  const int kMaxExponentLength = 5;
+  char buffer[kMaxExponentLength + 1];
+  buffer[kMaxExponentLength] = '\0';
+  int first_char_pos = kMaxExponentLength;
+  while (exponent > 0) {
+    buffer[--first_char_pos] = '0' + (exponent % 10);
+    exponent /= 10;
+  }
+  result_builder->AddSubstring(&buffer[first_char_pos],
+                               kMaxExponentLength - first_char_pos);
+}
+
+
+void DoubleToStringConverter::CreateDecimalRepresentation(
+    const char* decimal_digits,
+    int length,
+    int decimal_point,
+    int digits_after_point,
+    StringBuilder* result_builder) const {
+  // Create a representation that is padded with zeros if needed.
+  if (decimal_point <= 0) {
+      // "0.00000decimal_rep".
+    result_builder->AddCharacter('0');
+    if (digits_after_point > 0) {
+      result_builder->AddCharacter('.');
+      result_builder->AddPadding('0', -decimal_point);
+      ASSERT(length <= digits_after_point - (-decimal_point));
+      result_builder->AddSubstring(decimal_digits, length);
+      int remaining_digits = digits_after_point - (-decimal_point) - length;
+      result_builder->AddPadding('0', remaining_digits);
+    }
+  } else if (decimal_point >= length) {
+    // "decimal_rep0000.00000" or "decimal_rep.0000"
+    result_builder->AddSubstring(decimal_digits, length);
+    result_builder->AddPadding('0', decimal_point - length);
+    if (digits_after_point > 0) {
+      result_builder->AddCharacter('.');
+      result_builder->AddPadding('0', digits_after_point);
+    }
+  } else {
+    // "decima.l_rep000"
+    ASSERT(digits_after_point > 0);
+    result_builder->AddSubstring(decimal_digits, decimal_point);
+    result_builder->AddCharacter('.');
+    ASSERT(length - decimal_point <= digits_after_point);
+    result_builder->AddSubstring(&decimal_digits[decimal_point],
+                                 length - decimal_point);
+    int remaining_digits = digits_after_point - (length - decimal_point);
+    result_builder->AddPadding('0', remaining_digits);
+  }
+  if (digits_after_point == 0) {
+    if ((flags_ & EMIT_TRAILING_DECIMAL_POINT) != 0) {
+      result_builder->AddCharacter('.');
+    }
+    if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) {
+      result_builder->AddCharacter('0');
+    }
+  }
+}
+
+
+bool DoubleToStringConverter::ToShortestIeeeNumber(
+    double value,
+    StringBuilder* result_builder,
+    DoubleToStringConverter::DtoaMode mode) const {
+  ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE);
+  if (Double(value).IsSpecial()) {
+    return HandleSpecialValues(value, result_builder);
+  }
+
+  int decimal_point;
+  bool sign;
+  const int kDecimalRepCapacity = kBase10MaximalLength + 1;
+  char decimal_rep[kDecimalRepCapacity];
+  int decimal_rep_length;
+
+  DoubleToAscii(value, mode, 0, decimal_rep, kDecimalRepCapacity,
+                &sign, &decimal_rep_length, &decimal_point);
+
+  bool unique_zero = (flags_ & UNIQUE_ZERO) != 0;
+  if (sign && (value != 0.0 || !unique_zero)) {
+    result_builder->AddCharacter('-');
+  }
+
+  int exponent = decimal_point - 1;
+  if ((decimal_in_shortest_low_ <= exponent) &&
+      (exponent < decimal_in_shortest_high_)) {
+    CreateDecimalRepresentation(decimal_rep, decimal_rep_length,
+                                decimal_point,
+                                Max(0, decimal_rep_length - decimal_point),
+                                result_builder);
+  } else {
+    CreateExponentialRepresentation(decimal_rep, decimal_rep_length, exponent,
+                                    result_builder);
+  }
+  return true;
+}
+
+
+bool DoubleToStringConverter::ToFixed(double value,
+                                      int requested_digits,
+                                      StringBuilder* result_builder) const {
+  ASSERT(kMaxFixedDigitsBeforePoint == 60);
+  const double kFirstNonFixed = 1e60;
+
+  if (Double(value).IsSpecial()) {
+    return HandleSpecialValues(value, result_builder);
+  }
+
+  if (requested_digits > kMaxFixedDigitsAfterPoint) return false;
+  if (value >= kFirstNonFixed || value <= -kFirstNonFixed) return false;
+
+  // Find a sufficiently precise decimal representation of n.
+  int decimal_point;
+  bool sign;
+  // Add space for the '\0' byte.
+  const int kDecimalRepCapacity =
+      kMaxFixedDigitsBeforePoint + kMaxFixedDigitsAfterPoint + 1;
+  char decimal_rep[kDecimalRepCapacity];
+  int decimal_rep_length;
+  DoubleToAscii(value, FIXED, requested_digits,
+                decimal_rep, kDecimalRepCapacity,
+                &sign, &decimal_rep_length, &decimal_point);
+
+  bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
+  if (sign && (value != 0.0 || !unique_zero)) {
+    result_builder->AddCharacter('-');
+  }
+
+  CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point,
+                              requested_digits, result_builder);
+  return true;
+}
+
+
+bool DoubleToStringConverter::ToExponential(
+    double value,
+    int requested_digits,
+    StringBuilder* result_builder) const {
+  if (Double(value).IsSpecial()) {
+    return HandleSpecialValues(value, result_builder);
+  }
+
+  if (requested_digits < -1) return false;
+  if (requested_digits > kMaxExponentialDigits) return false;
+
+  int decimal_point;
+  bool sign;
+  // Add space for digit before the decimal point and the '\0' character.
+  const int kDecimalRepCapacity = kMaxExponentialDigits + 2;
+  ASSERT(kDecimalRepCapacity > kBase10MaximalLength);
+  char decimal_rep[kDecimalRepCapacity];
+  int decimal_rep_length;
+
+  if (requested_digits == -1) {
+    DoubleToAscii(value, SHORTEST, 0,
+                  decimal_rep, kDecimalRepCapacity,
+                  &sign, &decimal_rep_length, &decimal_point);
+  } else {
+    DoubleToAscii(value, PRECISION, requested_digits + 1,
+                  decimal_rep, kDecimalRepCapacity,
+                  &sign, &decimal_rep_length, &decimal_point);
+    ASSERT(decimal_rep_length <= requested_digits + 1);
+
+    for (int i = decimal_rep_length; i < requested_digits + 1; ++i) {
+      decimal_rep[i] = '0';
+    }
+    decimal_rep_length = requested_digits + 1;
+  }
+
+  bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
+  if (sign && (value != 0.0 || !unique_zero)) {
+    result_builder->AddCharacter('-');
+  }
+
+  int exponent = decimal_point - 1;
+  CreateExponentialRepresentation(decimal_rep,
+                                  decimal_rep_length,
+                                  exponent,
+                                  result_builder);
+  return true;
+}
+
+
+bool DoubleToStringConverter::ToPrecision(double value,
+                                          int precision,
+                                          StringBuilder* result_builder) const {
+  if (Double(value).IsSpecial()) {
+    return HandleSpecialValues(value, result_builder);
+  }
+
+  if (precision < kMinPrecisionDigits || precision > kMaxPrecisionDigits) {
+    return false;
+  }
+
+  // Find a sufficiently precise decimal representation of n.
+  int decimal_point;
+  bool sign;
+  // Add one for the terminating null character.
+  const int kDecimalRepCapacity = kMaxPrecisionDigits + 1;
+  char decimal_rep[kDecimalRepCapacity];
+  int decimal_rep_length;
+
+  DoubleToAscii(value, PRECISION, precision,
+                decimal_rep, kDecimalRepCapacity,
+                &sign, &decimal_rep_length, &decimal_point);
+  ASSERT(decimal_rep_length <= precision);
+
+  bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
+  if (sign && (value != 0.0 || !unique_zero)) {
+    result_builder->AddCharacter('-');
+  }
+
+  // The exponent if we print the number as x.xxeyyy. That is with the
+  // decimal point after the first digit.
+  int exponent = decimal_point - 1;
+
+  int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0;
+  if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) ||
+      (decimal_point - precision + extra_zero >
+       max_trailing_padding_zeroes_in_precision_mode_)) {
+    // Fill buffer to contain 'precision' digits.
+    // Usually the buffer is already at the correct length, but 'DoubleToAscii'
+    // is allowed to return less characters.
+    for (int i = decimal_rep_length; i < precision; ++i) {
+      decimal_rep[i] = '0';
+    }
+
+    CreateExponentialRepresentation(decimal_rep,
+                                    precision,
+                                    exponent,
+                                    result_builder);
+  } else {
+    CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point,
+                                Max(0, precision - decimal_point),
+                                result_builder);
+  }
+  return true;
+}
+
+
+static BignumDtoaMode DtoaToBignumDtoaMode(
+    DoubleToStringConverter::DtoaMode dtoa_mode) {
+  switch (dtoa_mode) {
+    case DoubleToStringConverter::SHORTEST:  return BIGNUM_DTOA_SHORTEST;
+    case DoubleToStringConverter::SHORTEST_SINGLE:
+        return BIGNUM_DTOA_SHORTEST_SINGLE;
+    case DoubleToStringConverter::FIXED:     return BIGNUM_DTOA_FIXED;
+    case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION;
+    default:
+      UNREACHABLE();
+      return BIGNUM_DTOA_SHORTEST;  // To silence compiler.
+  }
+}
+
+
+void DoubleToStringConverter::DoubleToAscii(double v,
+                                            DtoaMode mode,
+                                            int requested_digits,
+                                            char* buffer,
+                                            int buffer_length,
+                                            bool* sign,
+                                            int* length,
+                                            int* point) {
+  Vector<char> vector(buffer, buffer_length);
+  ASSERT(!Double(v).IsSpecial());
+  ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE || requested_digits >= 0);
+
+  if (Double(v).Sign() < 0) {
+    *sign = true;
+    v = -v;
+  } else {
+    *sign = false;
+  }
+
+  if (mode == PRECISION && requested_digits == 0) {
+    vector[0] = '\0';
+    *length = 0;
+    return;
+  }
+
+  if (v == 0) {
+    vector[0] = '0';
+    vector[1] = '\0';
+    *length = 1;
+    *point = 1;
+    return;
+  }
+
+  bool fast_worked;
+  switch (mode) {
+    case SHORTEST:
+      fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST, 0, vector, length, point);
+      break;
+    case SHORTEST_SINGLE:
+      fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST_SINGLE, 0,
+                             vector, length, point);
+      break;
+    case FIXED:
+      fast_worked = FastFixedDtoa(v, requested_digits, vector, length, point);
+      break;
+    case PRECISION:
+      fast_worked = FastDtoa(v, FAST_DTOA_PRECISION, requested_digits,
+                             vector, length, point);
+      break;
+    default:
+      UNREACHABLE();
+      fast_worked = false;
+  }
+  if (fast_worked) return;
+
+  // If the fast dtoa didn't succeed use the slower bignum version.
+  BignumDtoaMode bignum_mode = DtoaToBignumDtoaMode(mode);
+  BignumDtoa(v, bignum_mode, requested_digits, vector, length, point);
+  vector[*length] = '\0';
+}
+
+
+// Consumes the given substring from the iterator.
+// Returns false, if the substring does not match.
+static bool ConsumeSubString(const char** current,
+                             const char* end,
+                             const char* substring) {
+  ASSERT(**current == *substring);
+  for (substring++; *substring != '\0'; substring++) {
+    ++*current;
+    if (*current == end || **current != *substring) return false;
+  }
+  ++*current;
+  return true;
+}
+
+
+// Maximum number of significant digits in decimal representation.
+// The longest possible double in decimal representation is
+// (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074
+// (768 digits). If we parse a number whose first digits are equal to a
+// mean of 2 adjacent doubles (that could have up to 769 digits) the result
+// must be rounded to the bigger one unless the tail consists of zeros, so
+// we don't need to preserve all the digits.
+const int kMaxSignificantDigits = 772;
+
+
+// Returns true if a nonspace found and false if the end has reached.
+static inline bool AdvanceToNonspace(const char** current, const char* end) {
+  while (*current != end) {
+    if (**current != ' ') return true;
+    ++*current;
+  }
+  return false;
+}
+
+
+static bool isDigit(int x, int radix) {
+  return (x >= '0' && x <= '9' && x < '0' + radix)
+      || (radix > 10 && x >= 'a' && x < 'a' + radix - 10)
+      || (radix > 10 && x >= 'A' && x < 'A' + radix - 10);
+}
+
+
+static double SignedZero(bool sign) {
+  return sign ? -0.0 : 0.0;
+}
+
+
+// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
+template <int radix_log_2>
+static double RadixStringToIeee(const char* current,
+                                const char* end,
+                                bool sign,
+                                bool allow_trailing_junk,
+                                double junk_string_value,
+                                bool read_as_double,
+                                const char** trailing_pointer) {
+  ASSERT(current != end);
+
+  const int kDoubleSize = Double::kSignificandSize;
+  const int kSingleSize = Single::kSignificandSize;
+  const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize;
+
+  // Skip leading 0s.
+  while (*current == '0') {
+    ++current;
+    if (current == end) {
+      *trailing_pointer = end;
+      return SignedZero(sign);
+    }
+  }
+
+  int64_t number = 0;
+  int exponent = 0;
+  const int radix = (1 << radix_log_2);
+
+  do {
+    int digit;
+    if (*current >= '0' && *current <= '9' && *current < '0' + radix) {
+      digit = static_cast<char>(*current) - '0';
+    } else if (radix > 10 && *current >= 'a' && *current < 'a' + radix - 10) {
+      digit = static_cast<char>(*current) - 'a' + 10;
+    } else if (radix > 10 && *current >= 'A' && *current < 'A' + radix - 10) {
+      digit = static_cast<char>(*current) - 'A' + 10;
+    } else {
+      if (allow_trailing_junk || !AdvanceToNonspace(&current, end)) {
+        break;
+      } else {
+        return junk_string_value;
+      }
+    }
+
+    number = number * radix + digit;
+    int overflow = static_cast<int>(number >> kSignificandSize);
+    if (overflow != 0) {
+      // Overflow occurred. Need to determine which direction to round the
+      // result.
+      int overflow_bits_count = 1;
+      while (overflow > 1) {
+        overflow_bits_count++;
+        overflow >>= 1;
+      }
+
+      int dropped_bits_mask = ((1 << overflow_bits_count) - 1);
+      int dropped_bits = static_cast<int>(number) & dropped_bits_mask;
+      number >>= overflow_bits_count;
+      exponent = overflow_bits_count;
+
+      bool zero_tail = true;
+      while (true) {
+        ++current;
+        if (current == end || !isDigit(*current, radix)) break;
+        zero_tail = zero_tail && *current == '0';
+        exponent += radix_log_2;
+      }
+
+      if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+        return junk_string_value;
+      }
+
+      int middle_value = (1 << (overflow_bits_count - 1));
+      if (dropped_bits > middle_value) {
+        number++;  // Rounding up.
+      } else if (dropped_bits == middle_value) {
+        // Rounding to even to consistency with decimals: half-way case rounds
+        // up if significant part is odd and down otherwise.
+        if ((number & 1) != 0 || !zero_tail) {
+          number++;  // Rounding up.
+        }
+      }
+
+      // Rounding up may cause overflow.
+      if ((number & ((int64_t)1 << kSignificandSize)) != 0) {
+        exponent++;
+        number >>= 1;
+      }
+      break;
+    }
+    ++current;
+  } while (current != end);
+
+  ASSERT(number < ((int64_t)1 << kSignificandSize));
+  ASSERT(static_cast<int64_t>(static_cast<double>(number)) == number);
+
+  *trailing_pointer = current;
+
+  if (exponent == 0) {
+    if (sign) {
+      if (number == 0) return -0.0;
+      number = -number;
+    }
+    return static_cast<double>(number);
+  }
+
+  ASSERT(number != 0);
+  return Double(DiyFp(number, exponent)).value();
+}
+
+
+double StringToDoubleConverter::StringToIeee(
+    const char* input,
+    int length,
+    int* processed_characters_count,
+    bool read_as_double) const {
+  const char* current = input;
+  const char* end = input + length;
+
+  *processed_characters_count = 0;
+
+  const bool allow_trailing_junk = (flags_ & ALLOW_TRAILING_JUNK) != 0;
+  const bool allow_leading_spaces = (flags_ & ALLOW_LEADING_SPACES) != 0;
+  const bool allow_trailing_spaces = (flags_ & ALLOW_TRAILING_SPACES) != 0;
+  const bool allow_spaces_after_sign = (flags_ & ALLOW_SPACES_AFTER_SIGN) != 0;
+
+  // To make sure that iterator dereferencing is valid the following
+  // convention is used:
+  // 1. Each '++current' statement is followed by check for equality to 'end'.
+  // 2. If AdvanceToNonspace returned false then current == end.
+  // 3. If 'current' becomes equal to 'end' the function returns or goes to
+  // 'parsing_done'.
+  // 4. 'current' is not dereferenced after the 'parsing_done' label.
+  // 5. Code before 'parsing_done' may rely on 'current != end'.
+  if (current == end) return empty_string_value_;
+
+  if (allow_leading_spaces || allow_trailing_spaces) {
+    if (!AdvanceToNonspace(&current, end)) {
+      *processed_characters_count = current - input;
+      return empty_string_value_;
+    }
+    if (!allow_leading_spaces && (input != current)) {
+      // No leading spaces allowed, but AdvanceToNonspace moved forward.
+      return junk_string_value_;
+    }
+  }
+
+  // The longest form of simplified number is: "-<significant digits>.1eXXX\0".
+  const int kBufferSize = kMaxSignificantDigits + 10;
+  char buffer[kBufferSize];  // NOLINT: size is known at compile time.
+  int buffer_pos = 0;
+
+  // Exponent will be adjusted if insignificant digits of the integer part
+  // or insignificant leading zeros of the fractional part are dropped.
+  int exponent = 0;
+  int significant_digits = 0;
+  int insignificant_digits = 0;
+  bool nonzero_digit_dropped = false;
+
+  bool sign = false;
+
+  if (*current == '+' || *current == '-') {
+    sign = (*current == '-');
+    ++current;
+    const char* next_non_space = current;
+    // Skip following spaces (if allowed).
+    if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_;
+    if (!allow_spaces_after_sign && (current != next_non_space)) {
+      return junk_string_value_;
+    }
+    current = next_non_space;
+  }
+
+  if (infinity_symbol_ != NULL) {
+    if (*current == infinity_symbol_[0]) {
+      if (!ConsumeSubString(&current, end, infinity_symbol_)) {
+        return junk_string_value_;
+      }
+
+      if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
+        return junk_string_value_;
+      }
+      if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+        return junk_string_value_;
+      }
+
+      ASSERT(buffer_pos == 0);
+      *processed_characters_count = current - input;
+      return sign ? -Double::Infinity() : Double::Infinity();
+    }
+  }
+
+  if (nan_symbol_ != NULL) {
+    if (*current == nan_symbol_[0]) {
+      if (!ConsumeSubString(&current, end, nan_symbol_)) {
+        return junk_string_value_;
+      }
+
+      if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
+        return junk_string_value_;
+      }
+      if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+        return junk_string_value_;
+      }
+
+      ASSERT(buffer_pos == 0);
+      *processed_characters_count = current - input;
+      return sign ? -Double::NaN() : Double::NaN();
+    }
+  }
+
+  bool leading_zero = false;
+  if (*current == '0') {
+    ++current;
+    if (current == end) {
+      *processed_characters_count = current - input;
+      return SignedZero(sign);
+    }
+
+    leading_zero = true;
+
+    // It could be hexadecimal value.
+    if ((flags_ & ALLOW_HEX) && (*current == 'x' || *current == 'X')) {
+      ++current;
+      if (current == end || !isDigit(*current, 16)) {
+        return junk_string_value_;  // "0x".
+      }
+
+      const char* tail_pointer = NULL;
+      double result = RadixStringToIeee<4>(current,
+                                           end,
+                                           sign,
+                                           allow_trailing_junk,
+                                           junk_string_value_,
+                                           read_as_double,
+                                           &tail_pointer);
+      if (tail_pointer != NULL) {
+        if (allow_trailing_spaces) AdvanceToNonspace(&tail_pointer, end);
+        *processed_characters_count = tail_pointer - input;
+      }
+      return result;
+    }
+
+    // Ignore leading zeros in the integer part.
+    while (*current == '0') {
+      ++current;
+      if (current == end) {
+        *processed_characters_count = current - input;
+        return SignedZero(sign);
+      }
+    }
+  }
+
+  bool octal = leading_zero && (flags_ & ALLOW_OCTALS) != 0;
+
+  // Copy significant digits of the integer part (if any) to the buffer.
+  while (*current >= '0' && *current <= '9') {
+    if (significant_digits < kMaxSignificantDigits) {
+      ASSERT(buffer_pos < kBufferSize);
+      buffer[buffer_pos++] = static_cast<char>(*current);
+      significant_digits++;
+      // Will later check if it's an octal in the buffer.
+    } else {
+      insignificant_digits++;  // Move the digit into the exponential part.
+      nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
+    }
+    octal = octal && *current < '8';
+    ++current;
+    if (current == end) goto parsing_done;
+  }
+
+  if (significant_digits == 0) {
+    octal = false;
+  }
+
+  if (*current == '.') {
+    if (octal && !allow_trailing_junk) return junk_string_value_;
+    if (octal) goto parsing_done;
+
+    ++current;
+    if (current == end) {
+      if (significant_digits == 0 && !leading_zero) {
+        return junk_string_value_;
+      } else {
+        goto parsing_done;
+      }
+    }
+
+    if (significant_digits == 0) {
+      // octal = false;
+      // Integer part consists of 0 or is absent. Significant digits start after
+      // leading zeros (if any).
+      while (*current == '0') {
+        ++current;
+        if (current == end) {
+          *processed_characters_count = current - input;
+          return SignedZero(sign);
+        }
+        exponent--;  // Move this 0 into the exponent.
+      }
+    }
+
+    // There is a fractional part.
+    // We don't emit a '.', but adjust the exponent instead.
+    while (*current >= '0' && *current <= '9') {
+      if (significant_digits < kMaxSignificantDigits) {
+        ASSERT(buffer_pos < kBufferSize);
+        buffer[buffer_pos++] = static_cast<char>(*current);
+        significant_digits++;
+        exponent--;
+      } else {
+        // Ignore insignificant digits in the fractional part.
+        nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
+      }
+      ++current;
+      if (current == end) goto parsing_done;
+    }
+  }
+
+  if (!leading_zero && exponent == 0 && significant_digits == 0) {
+    // If leading_zeros is true then the string contains zeros.
+    // If exponent < 0 then string was [+-]\.0*...
+    // If significant_digits != 0 the string is not equal to 0.
+    // Otherwise there are no digits in the string.
+    return junk_string_value_;
+  }
+
+  // Parse exponential part.
+  if (*current == 'e' || *current == 'E') {
+    if (octal && !allow_trailing_junk) return junk_string_value_;
+    if (octal) goto parsing_done;
+    ++current;
+    if (current == end) {
+      if (allow_trailing_junk) {
+        goto parsing_done;
+      } else {
+        return junk_string_value_;
+      }
+    }
+    char sign = '+';
+    if (*current == '+' || *current == '-') {
+      sign = static_cast<char>(*current);
+      ++current;
+      if (current == end) {
+        if (allow_trailing_junk) {
+          goto parsing_done;
+        } else {
+          return junk_string_value_;
+        }
+      }
+    }
+
+    if (current == end || *current < '0' || *current > '9') {
+      if (allow_trailing_junk) {
+        goto parsing_done;
+      } else {
+        return junk_string_value_;
+      }
+    }
+
+    const int max_exponent = INT_MAX / 2;
+    ASSERT(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2);
+    int num = 0;
+    do {
+      // Check overflow.
+      int digit = *current - '0';
+      if (num >= max_exponent / 10
+          && !(num == max_exponent / 10 && digit <= max_exponent % 10)) {
+        num = max_exponent;
+      } else {
+        num = num * 10 + digit;
+      }
+      ++current;
+    } while (current != end && *current >= '0' && *current <= '9');
+
+    exponent += (sign == '-' ? -num : num);
+  }
+
+  if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
+    return junk_string_value_;
+  }
+  if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+    return junk_string_value_;
+  }
+  if (allow_trailing_spaces) {
+    AdvanceToNonspace(&current, end);
+  }
+
+  parsing_done:
+  exponent += insignificant_digits;
+
+  if (octal) {
+    double result;
+    const char* tail_pointer = NULL;
+    result = RadixStringToIeee<3>(buffer,
+                                  buffer + buffer_pos,
+                                  sign,
+                                  allow_trailing_junk,
+                                  junk_string_value_,
+                                  read_as_double,
+                                  &tail_pointer);
+    ASSERT(tail_pointer != NULL);
+    *processed_characters_count = current - input;
+    return result;
+  }
+
+  if (nonzero_digit_dropped) {
+    buffer[buffer_pos++] = '1';
+    exponent--;
+  }
+
+  ASSERT(buffer_pos < kBufferSize);
+  buffer[buffer_pos] = '\0';
+
+  double converted;
+  if (read_as_double) {
+    converted = Strtod(Vector<const char>(buffer, buffer_pos), exponent);
+  } else {
+    converted = Strtof(Vector<const char>(buffer, buffer_pos), exponent);
+  }
+  *processed_characters_count = current - input;
+  return sign? -converted: converted;
+}
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/double-conversion.h b/klm/util/double-conversion/double-conversion.h
new file mode 100644
index 00000000..1c3387d4
--- /dev/null
+++ b/klm/util/double-conversion/double-conversion.h
@@ -0,0 +1,536 @@
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_
+#define DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+class DoubleToStringConverter {
+ public:
+  // When calling ToFixed with a double > 10^kMaxFixedDigitsBeforePoint
+  // or a requested_digits parameter > kMaxFixedDigitsAfterPoint then the
+  // function returns false.
+  static const int kMaxFixedDigitsBeforePoint = 60;
+  static const int kMaxFixedDigitsAfterPoint = 60;
+
+  // When calling ToExponential with a requested_digits
+  // parameter > kMaxExponentialDigits then the function returns false.
+  static const int kMaxExponentialDigits = 120;
+
+  // When calling ToPrecision with a requested_digits
+  // parameter < kMinPrecisionDigits or requested_digits > kMaxPrecisionDigits
+  // then the function returns false.
+  static const int kMinPrecisionDigits = 1;
+  static const int kMaxPrecisionDigits = 120;
+
+  enum Flags {
+    NO_FLAGS = 0,
+    EMIT_POSITIVE_EXPONENT_SIGN = 1,
+    EMIT_TRAILING_DECIMAL_POINT = 2,
+    EMIT_TRAILING_ZERO_AFTER_POINT = 4,
+    UNIQUE_ZERO = 8
+  };
+
+  // Flags should be a bit-or combination of the possible Flags-enum.
+  //  - NO_FLAGS: no special flags.
+  //  - EMIT_POSITIVE_EXPONENT_SIGN: when the number is converted into exponent
+  //    form, emits a '+' for positive exponents. Example: 1.2e+2.
+  //  - EMIT_TRAILING_DECIMAL_POINT: when the input number is an integer and is
+  //    converted into decimal format then a trailing decimal point is appended.
+  //    Example: 2345.0 is converted to "2345.".
+  //  - EMIT_TRAILING_ZERO_AFTER_POINT: in addition to a trailing decimal point
+  //    emits a trailing '0'-character. This flag requires the
+  //    EXMIT_TRAILING_DECIMAL_POINT flag.
+  //    Example: 2345.0 is converted to "2345.0".
+  //  - UNIQUE_ZERO: "-0.0" is converted to "0.0".
+  //
+  // Infinity symbol and nan_symbol provide the string representation for these
+  // special values. If the string is NULL and the special value is encountered
+  // then the conversion functions return false.
+  //
+  // The exponent_character is used in exponential representations. It is
+  // usually 'e' or 'E'.
+  //
+  // When converting to the shortest representation the converter will
+  // represent input numbers in decimal format if they are in the interval
+  // [10^decimal_in_shortest_low; 10^decimal_in_shortest_high[
+  //    (lower boundary included, greater boundary excluded).
+  // Example: with decimal_in_shortest_low = -6 and
+  //               decimal_in_shortest_high = 21:
+  //   ToShortest(0.000001)  -> "0.000001"
+  //   ToShortest(0.0000001) -> "1e-7"
+  //   ToShortest(111111111111111111111.0)  -> "111111111111111110000"
+  //   ToShortest(100000000000000000000.0)  -> "100000000000000000000"
+  //   ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21"
+  //
+  // When converting to precision mode the converter may add
+  // max_leading_padding_zeroes before returning the number in exponential
+  // format.
+  // Example with max_leading_padding_zeroes_in_precision_mode = 6.
+  //   ToPrecision(0.0000012345, 2) -> "0.0000012"
+  //   ToPrecision(0.00000012345, 2) -> "1.2e-7"
+  // Similarily the converter may add up to
+  // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid
+  // returning an exponential representation. A zero added by the
+  // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit.
+  // Examples for max_trailing_padding_zeroes_in_precision_mode = 1:
+  //   ToPrecision(230.0, 2) -> "230"
+  //   ToPrecision(230.0, 2) -> "230."  with EMIT_TRAILING_DECIMAL_POINT.
+  //   ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT.
+  DoubleToStringConverter(int flags,
+                          const char* infinity_symbol,
+                          const char* nan_symbol,
+                          char exponent_character,
+                          int decimal_in_shortest_low,
+                          int decimal_in_shortest_high,
+                          int max_leading_padding_zeroes_in_precision_mode,
+                          int max_trailing_padding_zeroes_in_precision_mode)
+      : flags_(flags),
+        infinity_symbol_(infinity_symbol),
+        nan_symbol_(nan_symbol),
+        exponent_character_(exponent_character),
+        decimal_in_shortest_low_(decimal_in_shortest_low),
+        decimal_in_shortest_high_(decimal_in_shortest_high),
+        max_leading_padding_zeroes_in_precision_mode_(
+            max_leading_padding_zeroes_in_precision_mode),
+        max_trailing_padding_zeroes_in_precision_mode_(
+            max_trailing_padding_zeroes_in_precision_mode) {
+    // When 'trailing zero after the point' is set, then 'trailing point'
+    // must be set too.
+    ASSERT(((flags & EMIT_TRAILING_DECIMAL_POINT) != 0) ||
+        !((flags & EMIT_TRAILING_ZERO_AFTER_POINT) != 0));
+  }
+
+  // Returns a converter following the EcmaScript specification.
+  static const DoubleToStringConverter& EcmaScriptConverter();
+
+  // Computes the shortest string of digits that correctly represent the input
+  // number. Depending on decimal_in_shortest_low and decimal_in_shortest_high
+  // (see constructor) it then either returns a decimal representation, or an
+  // exponential representation.
+  // Example with decimal_in_shortest_low = -6,
+  //              decimal_in_shortest_high = 21,
+  //              EMIT_POSITIVE_EXPONENT_SIGN activated, and
+  //              EMIT_TRAILING_DECIMAL_POINT deactived:
+  //   ToShortest(0.000001)  -> "0.000001"
+  //   ToShortest(0.0000001) -> "1e-7"
+  //   ToShortest(111111111111111111111.0)  -> "111111111111111110000"
+  //   ToShortest(100000000000000000000.0)  -> "100000000000000000000"
+  //   ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21"
+  //
+  // Note: the conversion may round the output if the returned string
+  // is accurate enough to uniquely identify the input-number.
+  // For example the most precise representation of the double 9e59 equals
+  // "899999999999999918767229449717619953810131273674690656206848", but
+  // the converter will return the shorter (but still correct) "9e59".
+  //
+  // Returns true if the conversion succeeds. The conversion always succeeds
+  // except when the input value is special and no infinity_symbol or
+  // nan_symbol has been given to the constructor.
+  bool ToShortest(double value, StringBuilder* result_builder) const {
+    return ToShortestIeeeNumber(value, result_builder, SHORTEST);
+  }
+
+  // Same as ToShortest, but for single-precision floats.
+  bool ToShortestSingle(float value, StringBuilder* result_builder) const {
+    return ToShortestIeeeNumber(value, result_builder, SHORTEST_SINGLE);
+  }
+
+
+  // Computes a decimal representation with a fixed number of digits after the
+  // decimal point. The last emitted digit is rounded.
+  //
+  // Examples:
+  //   ToFixed(3.12, 1) -> "3.1"
+  //   ToFixed(3.1415, 3) -> "3.142"
+  //   ToFixed(1234.56789, 4) -> "1234.5679"
+  //   ToFixed(1.23, 5) -> "1.23000"
+  //   ToFixed(0.1, 4) -> "0.1000"
+  //   ToFixed(1e30, 2) -> "1000000000000000019884624838656.00"
+  //   ToFixed(0.1, 30) -> "0.100000000000000005551115123126"
+  //   ToFixed(0.1, 17) -> "0.10000000000000001"
+  //
+  // If requested_digits equals 0, then the tail of the result depends on
+  // the EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT.
+  // Examples, for requested_digits == 0,
+  //   let EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT be
+  //    - false and false: then 123.45 -> 123
+  //                             0.678 -> 1
+  //    - true and false: then 123.45 -> 123.
+  //                            0.678 -> 1.
+  //    - true and true: then 123.45 -> 123.0
+  //                           0.678 -> 1.0
+  //
+  // Returns true if the conversion succeeds. The conversion always succeeds
+  // except for the following cases:
+  //   - the input value is special and no infinity_symbol or nan_symbol has
+  //     been provided to the constructor,
+  //   - 'value' > 10^kMaxFixedDigitsBeforePoint, or
+  //   - 'requested_digits' > kMaxFixedDigitsAfterPoint.
+  // The last two conditions imply that the result will never contain more than
+  // 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters
+  // (one additional character for the sign, and one for the decimal point).
+  bool ToFixed(double value,
+               int requested_digits,
+               StringBuilder* result_builder) const;
+
+  // Computes a representation in exponential format with requested_digits
+  // after the decimal point. The last emitted digit is rounded.
+  // If requested_digits equals -1, then the shortest exponential representation
+  // is computed.
+  //
+  // Examples with EMIT_POSITIVE_EXPONENT_SIGN deactivated, and
+  //               exponent_character set to 'e'.
+  //   ToExponential(3.12, 1) -> "3.1e0"
+  //   ToExponential(5.0, 3) -> "5.000e0"
+  //   ToExponential(0.001, 2) -> "1.00e-3"
+  //   ToExponential(3.1415, -1) -> "3.1415e0"
+  //   ToExponential(3.1415, 4) -> "3.1415e0"
+  //   ToExponential(3.1415, 3) -> "3.142e0"
+  //   ToExponential(123456789000000, 3) -> "1.235e14"
+  //   ToExponential(1000000000000000019884624838656.0, -1) -> "1e30"
+  //   ToExponential(1000000000000000019884624838656.0, 32) ->
+  //                     "1.00000000000000001988462483865600e30"
+  //   ToExponential(1234, 0) -> "1e3"
+  //
+  // Returns true if the conversion succeeds. The conversion always succeeds
+  // except for the following cases:
+  //   - the input value is special and no infinity_symbol or nan_symbol has
+  //     been provided to the constructor,
+  //   - 'requested_digits' > kMaxExponentialDigits.
+  // The last condition implies that the result will never contain more than
+  // kMaxExponentialDigits + 8 characters (the sign, the digit before the
+  // decimal point, the decimal point, the exponent character, the
+  // exponent's sign, and at most 3 exponent digits).
+  bool ToExponential(double value,
+                     int requested_digits,
+                     StringBuilder* result_builder) const;
+
+  // Computes 'precision' leading digits of the given 'value' and returns them
+  // either in exponential or decimal format, depending on
+  // max_{leading|trailing}_padding_zeroes_in_precision_mode (given to the
+  // constructor).
+  // The last computed digit is rounded.
+  //
+  // Example with max_leading_padding_zeroes_in_precision_mode = 6.
+  //   ToPrecision(0.0000012345, 2) -> "0.0000012"
+  //   ToPrecision(0.00000012345, 2) -> "1.2e-7"
+  // Similarily the converter may add up to
+  // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid
+  // returning an exponential representation. A zero added by the
+  // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit.
+  // Examples for max_trailing_padding_zeroes_in_precision_mode = 1:
+  //   ToPrecision(230.0, 2) -> "230"
+  //   ToPrecision(230.0, 2) -> "230."  with EMIT_TRAILING_DECIMAL_POINT.
+  //   ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT.
+  // Examples for max_trailing_padding_zeroes_in_precision_mode = 3, and no
+  //    EMIT_TRAILING_ZERO_AFTER_POINT:
+  //   ToPrecision(123450.0, 6) -> "123450"
+  //   ToPrecision(123450.0, 5) -> "123450"
+  //   ToPrecision(123450.0, 4) -> "123500"
+  //   ToPrecision(123450.0, 3) -> "123000"
+  //   ToPrecision(123450.0, 2) -> "1.2e5"
+  //
+  // Returns true if the conversion succeeds. The conversion always succeeds
+  // except for the following cases:
+  //   - the input value is special and no infinity_symbol or nan_symbol has
+  //     been provided to the constructor,
+  //   - precision < kMinPericisionDigits
+  //   - precision > kMaxPrecisionDigits
+  // The last condition implies that the result will never contain more than
+  // kMaxPrecisionDigits + 7 characters (the sign, the decimal point, the
+  // exponent character, the exponent's sign, and at most 3 exponent digits).
+  bool ToPrecision(double value,
+                   int precision,
+                   StringBuilder* result_builder) const;
+
+  enum DtoaMode {
+    // Produce the shortest correct representation.
+    // For example the output of 0.299999999999999988897 is (the less accurate
+    // but correct) 0.3.
+    SHORTEST,
+    // Same as SHORTEST, but for single-precision floats.
+    SHORTEST_SINGLE,
+    // Produce a fixed number of digits after the decimal point.
+    // For instance fixed(0.1, 4) becomes 0.1000
+    // If the input number is big, the output will be big.
+    FIXED,
+    // Fixed number of digits (independent of the decimal point).
+    PRECISION
+  };
+
+  // The maximal number of digits that are needed to emit a double in base 10.
+  // A higher precision can be achieved by using more digits, but the shortest
+  // accurate representation of any double will never use more digits than
+  // kBase10MaximalLength.
+  // Note that DoubleToAscii null-terminates its input. So the given buffer
+  // should be at least kBase10MaximalLength + 1 characters long.
+  static const int kBase10MaximalLength = 17;
+
+  // Converts the given double 'v' to ascii. 'v' must not be NaN, +Infinity, or
+  // -Infinity. In SHORTEST_SINGLE-mode this restriction also applies to 'v'
+  // after it has been casted to a single-precision float. That is, in this
+  // mode static_cast<float>(v) must not be NaN, +Infinity or -Infinity.
+  //
+  // The result should be interpreted as buffer * 10^(point-length).
+  //
+  // The output depends on the given mode:
+  //  - SHORTEST: produce the least amount of digits for which the internal
+  //   identity requirement is still satisfied. If the digits are printed
+  //   (together with the correct exponent) then reading this number will give
+  //   'v' again. The buffer will choose the representation that is closest to
+  //   'v'. If there are two at the same distance, than the one farther away
+  //   from 0 is chosen (halfway cases - ending with 5 - are rounded up).
+  //   In this mode the 'requested_digits' parameter is ignored.
+  //  - SHORTEST_SINGLE: same as SHORTEST but with single-precision.
+  //  - FIXED: produces digits necessary to print a given number with
+  //   'requested_digits' digits after the decimal point. The produced digits
+  //   might be too short in which case the caller has to fill the remainder
+  //   with '0's.
+  //   Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2.
+  //   Halfway cases are rounded towards +/-Infinity (away from 0). The call
+  //   toFixed(0.15, 2) thus returns buffer="2", point=0.
+  //   The returned buffer may contain digits that would be truncated from the
+  //   shortest representation of the input.
+  //  - PRECISION: produces 'requested_digits' where the first digit is not '0'.
+  //   Even though the length of produced digits usually equals
+  //   'requested_digits', the function is allowed to return fewer digits, in
+  //   which case the caller has to fill the missing digits with '0's.
+  //   Halfway cases are again rounded away from 0.
+  // DoubleToAscii expects the given buffer to be big enough to hold all
+  // digits and a terminating null-character. In SHORTEST-mode it expects a
+  // buffer of at least kBase10MaximalLength + 1. In all other modes the
+  // requested_digits parameter and the padding-zeroes limit the size of the
+  // output. Don't forget the decimal point, the exponent character and the
+  // terminating null-character when computing the maximal output size.
+  // The given length is only used in debug mode to ensure the buffer is big
+  // enough.
+  static void DoubleToAscii(double v,
+                            DtoaMode mode,
+                            int requested_digits,
+                            char* buffer,
+                            int buffer_length,
+                            bool* sign,
+                            int* length,
+                            int* point);
+
+ private:
+  // Implementation for ToShortest and ToShortestSingle.
+  bool ToShortestIeeeNumber(double value,
+                            StringBuilder* result_builder,
+                            DtoaMode mode) const;
+
+  // If the value is a special value (NaN or Infinity) constructs the
+  // corresponding string using the configured infinity/nan-symbol.
+  // If either of them is NULL or the value is not special then the
+  // function returns false.
+  bool HandleSpecialValues(double value, StringBuilder* result_builder) const;
+  // Constructs an exponential representation (i.e. 1.234e56).
+  // The given exponent assumes a decimal point after the first decimal digit.
+  void CreateExponentialRepresentation(const char* decimal_digits,
+                                       int length,
+                                       int exponent,
+                                       StringBuilder* result_builder) const;
+  // Creates a decimal representation (i.e 1234.5678).
+  void CreateDecimalRepresentation(const char* decimal_digits,
+                                   int length,
+                                   int decimal_point,
+                                   int digits_after_point,
+                                   StringBuilder* result_builder) const;
+
+  const int flags_;
+  const char* const infinity_symbol_;
+  const char* const nan_symbol_;
+  const char exponent_character_;
+  const int decimal_in_shortest_low_;
+  const int decimal_in_shortest_high_;
+  const int max_leading_padding_zeroes_in_precision_mode_;
+  const int max_trailing_padding_zeroes_in_precision_mode_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(DoubleToStringConverter);
+};
+
+
+class StringToDoubleConverter {
+ public:
+  // Enumeration for allowing octals and ignoring junk when converting
+  // strings to numbers.
+  enum Flags {
+    NO_FLAGS = 0,
+    ALLOW_HEX = 1,
+    ALLOW_OCTALS = 2,
+    ALLOW_TRAILING_JUNK = 4,
+    ALLOW_LEADING_SPACES = 8,
+    ALLOW_TRAILING_SPACES = 16,
+    ALLOW_SPACES_AFTER_SIGN = 32
+  };
+
+  // Flags should be a bit-or combination of the possible Flags-enum.
+  //  - NO_FLAGS: no special flags.
+  //  - ALLOW_HEX: recognizes the prefix "0x". Hex numbers may only be integers.
+  //      Ex: StringToDouble("0x1234") -> 4660.0
+  //          In StringToDouble("0x1234.56") the characters ".56" are trailing
+  //          junk. The result of the call is hence dependent on
+  //          the ALLOW_TRAILING_JUNK flag and/or the junk value.
+  //      With this flag "0x" is a junk-string. Even with ALLOW_TRAILING_JUNK,
+  //      the string will not be parsed as "0" followed by junk.
+  //
+  //  - ALLOW_OCTALS: recognizes the prefix "0" for octals:
+  //      If a sequence of octal digits starts with '0', then the number is
+  //      read as octal integer. Octal numbers may only be integers.
+  //      Ex: StringToDouble("01234") -> 668.0
+  //          StringToDouble("012349") -> 12349.0  // Not a sequence of octal
+  //                                               // digits.
+  //          In StringToDouble("01234.56") the characters ".56" are trailing
+  //          junk. The result of the call is hence dependent on
+  //          the ALLOW_TRAILING_JUNK flag and/or the junk value.
+  //          In StringToDouble("01234e56") the characters "e56" are trailing
+  //          junk, too.
+  //  - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of
+  //      a double literal.
+  //  - ALLOW_LEADING_SPACES: skip over leading spaces.
+  //  - ALLOW_TRAILING_SPACES: ignore trailing spaces.
+  //  - ALLOW_SPACES_AFTER_SIGN: ignore spaces after the sign.
+  //       Ex: StringToDouble("-   123.2") -> -123.2.
+  //           StringToDouble("+   123.2") -> 123.2
+  //
+  // empty_string_value is returned when an empty string is given as input.
+  // If ALLOW_LEADING_SPACES or ALLOW_TRAILING_SPACES are set, then a string
+  // containing only spaces is converted to the 'empty_string_value', too.
+  //
+  // junk_string_value is returned when
+  //  a) ALLOW_TRAILING_JUNK is not set, and a junk character (a character not
+  //     part of a double-literal) is found.
+  //  b) ALLOW_TRAILING_JUNK is set, but the string does not start with a
+  //     double literal.
+  //
+  // infinity_symbol and nan_symbol are strings that are used to detect
+  // inputs that represent infinity and NaN. They can be null, in which case
+  // they are ignored.
+  // The conversion routine first reads any possible signs. Then it compares the
+  // following character of the input-string with the first character of
+  // the infinity, and nan-symbol. If either matches, the function assumes, that
+  // a match has been found, and expects the following input characters to match
+  // the remaining characters of the special-value symbol.
+  // This means that the following restrictions apply to special-value symbols:
+  //  - they must not start with signs ('+', or '-'),
+  //  - they must not have the same first character.
+  //  - they must not start with digits.
+  //
+  // Examples:
+  //  flags = ALLOW_HEX | ALLOW_TRAILING_JUNK,
+  //  empty_string_value = 0.0,
+  //  junk_string_value = NaN,
+  //  infinity_symbol = "infinity",
+  //  nan_symbol = "nan":
+  //    StringToDouble("0x1234") -> 4660.0.
+  //    StringToDouble("0x1234K") -> 4660.0.
+  //    StringToDouble("") -> 0.0  // empty_string_value.
+  //    StringToDouble(" ") -> NaN  // junk_string_value.
+  //    StringToDouble(" 1") -> NaN  // junk_string_value.
+  //    StringToDouble("0x") -> NaN  // junk_string_value.
+  //    StringToDouble("-123.45") -> -123.45.
+  //    StringToDouble("--123.45") -> NaN  // junk_string_value.
+  //    StringToDouble("123e45") -> 123e45.
+  //    StringToDouble("123E45") -> 123e45.
+  //    StringToDouble("123e+45") -> 123e45.
+  //    StringToDouble("123E-45") -> 123e-45.
+  //    StringToDouble("123e") -> 123.0  // trailing junk ignored.
+  //    StringToDouble("123e-") -> 123.0  // trailing junk ignored.
+  //    StringToDouble("+NaN") -> NaN  // NaN string literal.
+  //    StringToDouble("-infinity") -> -inf.  // infinity literal.
+  //    StringToDouble("Infinity") -> NaN  // junk_string_value.
+  //
+  //  flags = ALLOW_OCTAL | ALLOW_LEADING_SPACES,
+  //  empty_string_value = 0.0,
+  //  junk_string_value = NaN,
+  //  infinity_symbol = NULL,
+  //  nan_symbol = NULL:
+  //    StringToDouble("0x1234") -> NaN  // junk_string_value.
+  //    StringToDouble("01234") -> 668.0.
+  //    StringToDouble("") -> 0.0  // empty_string_value.
+  //    StringToDouble(" ") -> 0.0  // empty_string_value.
+  //    StringToDouble(" 1") -> 1.0
+  //    StringToDouble("0x") -> NaN  // junk_string_value.
+  //    StringToDouble("0123e45") -> NaN  // junk_string_value.
+  //    StringToDouble("01239E45") -> 1239e45.
+  //    StringToDouble("-infinity") -> NaN  // junk_string_value.
+  //    StringToDouble("NaN") -> NaN  // junk_string_value.
+  StringToDoubleConverter(int flags,
+                          double empty_string_value,
+                          double junk_string_value,
+                          const char* infinity_symbol,
+                          const char* nan_symbol)
+      : flags_(flags),
+        empty_string_value_(empty_string_value),
+        junk_string_value_(junk_string_value),
+        infinity_symbol_(infinity_symbol),
+        nan_symbol_(nan_symbol) {
+  }
+
+  // Performs the conversion.
+  // The output parameter 'processed_characters_count' is set to the number
+  // of characters that have been processed to read the number.
+  // Spaces than are processed with ALLOW_{LEADING|TRAILING}_SPACES are included
+  // in the 'processed_characters_count'. Trailing junk is never included.
+  double StringToDouble(const char* buffer,
+                        int length,
+                        int* processed_characters_count) const {
+    return StringToIeee(buffer, length, processed_characters_count, true);
+  }
+
+  // Same as StringToDouble but reads a float.
+  // Note that this is not equivalent to static_cast<float>(StringToDouble(...))
+  // due to potential double-rounding.
+  float StringToFloat(const char* buffer,
+                      int length,
+                      int* processed_characters_count) const {
+    return static_cast<float>(StringToIeee(buffer, length,
+                                           processed_characters_count, false));
+  }
+
+ private:
+  const int flags_;
+  const double empty_string_value_;
+  const double junk_string_value_;
+  const char* const infinity_symbol_;
+  const char* const nan_symbol_;
+
+  double StringToIeee(const char* buffer,
+                      int length,
+                      int* processed_characters_count,
+                      bool read_as_double) const;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(StringToDoubleConverter);
+};
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_
diff --git a/klm/util/double-conversion/fast-dtoa.cc b/klm/util/double-conversion/fast-dtoa.cc
new file mode 100644
index 00000000..1a0f8235
--- /dev/null
+++ b/klm/util/double-conversion/fast-dtoa.cc
@@ -0,0 +1,664 @@
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "fast-dtoa.h"
+
+#include "cached-powers.h"
+#include "diy-fp.h"
+#include "ieee.h"
+
+namespace double_conversion {
+
+// The minimal and maximal target exponent define the range of w's binary
+// exponent, where 'w' is the result of multiplying the input by a cached power
+// of ten.
+//
+// A different range might be chosen on a different platform, to optimize digit
+// generation, but a smaller range requires more powers of ten to be cached.
+static const int kMinimalTargetExponent = -60;
+static const int kMaximalTargetExponent = -32;
+
+
+// Adjusts the last digit of the generated number, and screens out generated
+// solutions that may be inaccurate. A solution may be inaccurate if it is
+// outside the safe interval, or if we cannot prove that it is closer to the
+// input than a neighboring representation of the same length.
+//
+// Input: * buffer containing the digits of too_high / 10^kappa
+//        * the buffer's length
+//        * distance_too_high_w == (too_high - w).f() * unit
+//        * unsafe_interval == (too_high - too_low).f() * unit
+//        * rest = (too_high - buffer * 10^kappa).f() * unit
+//        * ten_kappa = 10^kappa * unit
+//        * unit = the common multiplier
+// Output: returns true if the buffer is guaranteed to contain the closest
+//    representable number to the input.
+//  Modifies the generated digits in the buffer to approach (round towards) w.
+static bool RoundWeed(Vector<char> buffer,
+                      int length,
+                      uint64_t distance_too_high_w,
+                      uint64_t unsafe_interval,
+                      uint64_t rest,
+                      uint64_t ten_kappa,
+                      uint64_t unit) {
+  uint64_t small_distance = distance_too_high_w - unit;
+  uint64_t big_distance = distance_too_high_w + unit;
+  // Let w_low  = too_high - big_distance, and
+  //     w_high = too_high - small_distance.
+  // Note: w_low < w < w_high
+  //
+  // The real w (* unit) must lie somewhere inside the interval
+  // ]w_low; w_high[ (often written as "(w_low; w_high)")
+
+  // Basically the buffer currently contains a number in the unsafe interval
+  // ]too_low; too_high[ with too_low < w < too_high
+  //
+  //  too_high - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+  //                     ^v 1 unit            ^      ^                 ^      ^
+  //  boundary_high ---------------------     .      .                 .      .
+  //                     ^v 1 unit            .      .                 .      .
+  //   - - - - - - - - - - - - - - - - - - -  +  - - + - - - - - -     .      .
+  //                                          .      .         ^       .      .
+  //                                          .  big_distance  .       .      .
+  //                                          .      .         .       .    rest
+  //                              small_distance     .         .       .      .
+  //                                          v      .         .       .      .
+  //  w_high - - - - - - - - - - - - - - - - - -     .         .       .      .
+  //                     ^v 1 unit                   .         .       .      .
+  //  w ----------------------------------------     .         .       .      .
+  //                     ^v 1 unit                   v         .       .      .
+  //  w_low  - - - - - - - - - - - - - - - - - - - - -         .       .      .
+  //                                                           .       .      v
+  //  buffer --------------------------------------------------+-------+--------
+  //                                                           .       .
+  //                                                  safe_interval    .
+  //                                                           v       .
+  //   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -     .
+  //                     ^v 1 unit                                     .
+  //  boundary_low -------------------------                     unsafe_interval
+  //                     ^v 1 unit                                     v
+  //  too_low  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+  //
+  //
+  // Note that the value of buffer could lie anywhere inside the range too_low
+  // to too_high.
+  //
+  // boundary_low, boundary_high and w are approximations of the real boundaries
+  // and v (the input number). They are guaranteed to be precise up to one unit.
+  // In fact the error is guaranteed to be strictly less than one unit.
+  //
+  // Anything that lies outside the unsafe interval is guaranteed not to round
+  // to v when read again.
+  // Anything that lies inside the safe interval is guaranteed to round to v
+  // when read again.
+  // If the number inside the buffer lies inside the unsafe interval but not
+  // inside the safe interval then we simply do not know and bail out (returning
+  // false).
+  //
+  // Similarly we have to take into account the imprecision of 'w' when finding
+  // the closest representation of 'w'. If we have two potential
+  // representations, and one is closer to both w_low and w_high, then we know
+  // it is closer to the actual value v.
+  //
+  // By generating the digits of too_high we got the largest (closest to
+  // too_high) buffer that is still in the unsafe interval. In the case where
+  // w_high < buffer < too_high we try to decrement the buffer.
+  // This way the buffer approaches (rounds towards) w.
+  // There are 3 conditions that stop the decrementation process:
+  //   1) the buffer is already below w_high
+  //   2) decrementing the buffer would make it leave the unsafe interval
+  //   3) decrementing the buffer would yield a number below w_high and farther
+  //      away than the current number. In other words:
+  //              (buffer{-1} < w_high) && w_high - buffer{-1} > buffer - w_high
+  // Instead of using the buffer directly we use its distance to too_high.
+  // Conceptually rest ~= too_high - buffer
+  // We need to do the following tests in this order to avoid over- and
+  // underflows.
+  ASSERT(rest <= unsafe_interval);
+  while (rest < small_distance &&  // Negated condition 1
+         unsafe_interval - rest >= ten_kappa &&  // Negated condition 2
+         (rest + ten_kappa < small_distance ||  // buffer{-1} > w_high
+          small_distance - rest >= rest + ten_kappa - small_distance)) {
+    buffer[length - 1]--;
+    rest += ten_kappa;
+  }
+
+  // We have approached w+ as much as possible. We now test if approaching w-
+  // would require changing the buffer. If yes, then we have two possible
+  // representations close to w, but we cannot decide which one is closer.
+  if (rest < big_distance &&
+      unsafe_interval - rest >= ten_kappa &&
+      (rest + ten_kappa < big_distance ||
+       big_distance - rest > rest + ten_kappa - big_distance)) {
+    return false;
+  }
+
+  // Weeding test.
+  //   The safe interval is [too_low + 2 ulp; too_high - 2 ulp]
+  //   Since too_low = too_high - unsafe_interval this is equivalent to
+  //      [too_high - unsafe_interval + 4 ulp; too_high - 2 ulp]
+  //   Conceptually we have: rest ~= too_high - buffer
+  return (2 * unit <= rest) && (rest <= unsafe_interval - 4 * unit);
+}
+
+
+// Rounds the buffer upwards if the result is closer to v by possibly adding
+// 1 to the buffer. If the precision of the calculation is not sufficient to
+// round correctly, return false.
+// The rounding might shift the whole buffer in which case the kappa is
+// adjusted. For example "99", kappa = 3 might become "10", kappa = 4.
+//
+// If 2*rest > ten_kappa then the buffer needs to be round up.
+// rest can have an error of +/- 1 unit. This function accounts for the
+// imprecision and returns false, if the rounding direction cannot be
+// unambiguously determined.
+//
+// Precondition: rest < ten_kappa.
+static bool RoundWeedCounted(Vector<char> buffer,
+                             int length,
+                             uint64_t rest,
+                             uint64_t ten_kappa,
+                             uint64_t unit,
+                             int* kappa) {
+  ASSERT(rest < ten_kappa);
+  // The following tests are done in a specific order to avoid overflows. They
+  // will work correctly with any uint64 values of rest < ten_kappa and unit.
+  //
+  // If the unit is too big, then we don't know which way to round. For example
+  // a unit of 50 means that the real number lies within rest +/- 50. If
+  // 10^kappa == 40 then there is no way to tell which way to round.
+  if (unit >= ten_kappa) return false;
+  // Even if unit is just half the size of 10^kappa we are already completely
+  // lost. (And after the previous test we know that the expression will not
+  // over/underflow.)
+  if (ten_kappa - unit <= unit) return false;
+  // If 2 * (rest + unit) <= 10^kappa we can safely round down.
+  if ((ten_kappa - rest > rest) && (ten_kappa - 2 * rest >= 2 * unit)) {
+    return true;
+  }
+  // If 2 * (rest - unit) >= 10^kappa, then we can safely round up.
+  if ((rest > unit) && (ten_kappa - (rest - unit) <= (rest - unit))) {
+    // Increment the last digit recursively until we find a non '9' digit.
+    buffer[length - 1]++;
+    for (int i = length - 1; i > 0; --i) {
+      if (buffer[i] != '0' + 10) break;
+      buffer[i] = '0';
+      buffer[i - 1]++;
+    }
+    // If the first digit is now '0'+ 10 we had a buffer with all '9's. With the
+    // exception of the first digit all digits are now '0'. Simply switch the
+    // first digit to '1' and adjust the kappa. Example: "99" becomes "10" and
+    // the power (the kappa) is increased.
+    if (buffer[0] == '0' + 10) {
+      buffer[0] = '1';
+      (*kappa) += 1;
+    }
+    return true;
+  }
+  return false;
+}
+
+// Returns the biggest power of ten that is less than or equal to the given
+// number. We furthermore receive the maximum number of bits 'number' has.
+//
+// Returns power == 10^(exponent_plus_one-1) such that
+//    power <= number < power * 10.
+// If number_bits == 0 then 0^(0-1) is returned.
+// The number of bits must be <= 32.
+// Precondition: number < (1 << (number_bits + 1)).
+
+// Inspired by the method for finding an integer log base 10 from here:
+// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+static unsigned int const kSmallPowersOfTen[] =
+    {0, 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000,
+     1000000000};
+
+static void BiggestPowerTen(uint32_t number,
+                            int number_bits,
+                            uint32_t* power,
+                            int* exponent_plus_one) {
+  ASSERT(number < (1u << (number_bits + 1)));
+  // 1233/4096 is approximately 1/lg(10).
+  int exponent_plus_one_guess = ((number_bits + 1) * 1233 >> 12);
+  // We increment to skip over the first entry in the kPowersOf10 table.
+  // Note: kPowersOf10[i] == 10^(i-1).
+  exponent_plus_one_guess++;
+  // We don't have any guarantees that 2^number_bits <= number.
+  // TODO(floitsch): can we change the 'while' into an 'if'? We definitely see
+  // number < (2^number_bits - 1), but I haven't encountered
+  // number < (2^number_bits - 2) yet.
+  while (number < kSmallPowersOfTen[exponent_plus_one_guess]) {
+    exponent_plus_one_guess--;
+  }
+  *power = kSmallPowersOfTen[exponent_plus_one_guess];
+  *exponent_plus_one = exponent_plus_one_guess;
+}
+
+// Generates the digits of input number w.
+// w is a floating-point number (DiyFp), consisting of a significand and an
+// exponent. Its exponent is bounded by kMinimalTargetExponent and
+// kMaximalTargetExponent.
+//       Hence -60 <= w.e() <= -32.
+//
+// Returns false if it fails, in which case the generated digits in the buffer
+// should not be used.
+// Preconditions:
+//  * low, w and high are correct up to 1 ulp (unit in the last place). That
+//    is, their error must be less than a unit of their last digits.
+//  * low.e() == w.e() == high.e()
+//  * low < w < high, and taking into account their error: low~ <= high~
+//  * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent
+// Postconditions: returns false if procedure fails.
+//   otherwise:
+//     * buffer is not null-terminated, but len contains the number of digits.
+//     * buffer contains the shortest possible decimal digit-sequence
+//       such that LOW < buffer * 10^kappa < HIGH, where LOW and HIGH are the
+//       correct values of low and high (without their error).
+//     * if more than one decimal representation gives the minimal number of
+//       decimal digits then the one closest to W (where W is the correct value
+//       of w) is chosen.
+// Remark: this procedure takes into account the imprecision of its input
+//   numbers. If the precision is not enough to guarantee all the postconditions
+//   then false is returned. This usually happens rarely (~0.5%).
+//
+// Say, for the sake of example, that
+//   w.e() == -48, and w.f() == 0x1234567890abcdef
+// w's value can be computed by w.f() * 2^w.e()
+// We can obtain w's integral digits by simply shifting w.f() by -w.e().
+//  -> w's integral part is 0x1234
+//  w's fractional part is therefore 0x567890abcdef.
+// Printing w's integral part is easy (simply print 0x1234 in decimal).
+// In order to print its fraction we repeatedly multiply the fraction by 10 and
+// get each digit. Example the first digit after the point would be computed by
+//   (0x567890abcdef * 10) >> 48. -> 3
+// The whole thing becomes slightly more complicated because we want to stop
+// once we have enough digits. That is, once the digits inside the buffer
+// represent 'w' we can stop. Everything inside the interval low - high
+// represents w. However we have to pay attention to low, high and w's
+// imprecision.
+static bool DigitGen(DiyFp low,
+                     DiyFp w,
+                     DiyFp high,
+                     Vector<char> buffer,
+                     int* length,
+                     int* kappa) {
+  ASSERT(low.e() == w.e() && w.e() == high.e());
+  ASSERT(low.f() + 1 <= high.f() - 1);
+  ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent);
+  // low, w and high are imprecise, but by less than one ulp (unit in the last
+  // place).
+  // If we remove (resp. add) 1 ulp from low (resp. high) we are certain that
+  // the new numbers are outside of the interval we want the final
+  // representation to lie in.
+  // Inversely adding (resp. removing) 1 ulp from low (resp. high) would yield
+  // numbers that are certain to lie in the interval. We will use this fact
+  // later on.
+  // We will now start by generating the digits within the uncertain
+  // interval. Later we will weed out representations that lie outside the safe
+  // interval and thus _might_ lie outside the correct interval.
+  uint64_t unit = 1;
+  DiyFp too_low = DiyFp(low.f() - unit, low.e());
+  DiyFp too_high = DiyFp(high.f() + unit, high.e());
+  // too_low and too_high are guaranteed to lie outside the interval we want the
+  // generated number in.
+  DiyFp unsafe_interval = DiyFp::Minus(too_high, too_low);
+  // We now cut the input number into two parts: the integral digits and the
+  // fractionals. We will not write any decimal separator though, but adapt
+  // kappa instead.
+  // Reminder: we are currently computing the digits (stored inside the buffer)
+  // such that:   too_low < buffer * 10^kappa < too_high
+  // We use too_high for the digit_generation and stop as soon as possible.
+  // If we stop early we effectively round down.
+  DiyFp one = DiyFp(static_cast<uint64_t>(1) << -w.e(), w.e());
+  // Division by one is a shift.
+  uint32_t integrals = static_cast<uint32_t>(too_high.f() >> -one.e());
+  // Modulo by one is an and.
+  uint64_t fractionals = too_high.f() & (one.f() - 1);
+  uint32_t divisor;
+  int divisor_exponent_plus_one;
+  BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()),
+                  &divisor, &divisor_exponent_plus_one);
+  *kappa = divisor_exponent_plus_one;
+  *length = 0;
+  // Loop invariant: buffer = too_high / 10^kappa  (integer division)
+  // The invariant holds for the first iteration: kappa has been initialized
+  // with the divisor exponent + 1. And the divisor is the biggest power of ten
+  // that is smaller than integrals.
+  while (*kappa > 0) {
+    int digit = integrals / divisor;
+    buffer[*length] = '0' + digit;
+    (*length)++;
+    integrals %= divisor;
+    (*kappa)--;
+    // Note that kappa now equals the exponent of the divisor and that the
+    // invariant thus holds again.
+    uint64_t rest =
+        (static_cast<uint64_t>(integrals) << -one.e()) + fractionals;
+    // Invariant: too_high = buffer * 10^kappa + DiyFp(rest, one.e())
+    // Reminder: unsafe_interval.e() == one.e()
+    if (rest < unsafe_interval.f()) {
+      // Rounding down (by not emitting the remaining digits) yields a number
+      // that lies within the unsafe interval.
+      return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f(),
+                       unsafe_interval.f(), rest,
+                       static_cast<uint64_t>(divisor) << -one.e(), unit);
+    }
+    divisor /= 10;
+  }
+
+  // The integrals have been generated. We are at the point of the decimal
+  // separator. In the following loop we simply multiply the remaining digits by
+  // 10 and divide by one. We just need to pay attention to multiply associated
+  // data (like the interval or 'unit'), too.
+  // Note that the multiplication by 10 does not overflow, because w.e >= -60
+  // and thus one.e >= -60.
+  ASSERT(one.e() >= -60);
+  ASSERT(fractionals < one.f());
+  ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f());
+  while (true) {
+    fractionals *= 10;
+    unit *= 10;
+    unsafe_interval.set_f(unsafe_interval.f() * 10);
+    // Integer division by one.
+    int digit = static_cast<int>(fractionals >> -one.e());
+    buffer[*length] = '0' + digit;
+    (*length)++;
+    fractionals &= one.f() - 1;  // Modulo by one.
+    (*kappa)--;
+    if (fractionals < unsafe_interval.f()) {
+      return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f() * unit,
+                       unsafe_interval.f(), fractionals, one.f(), unit);
+    }
+  }
+}
+
+
+
+// Generates (at most) requested_digits digits of input number w.
+// w is a floating-point number (DiyFp), consisting of a significand and an
+// exponent. Its exponent is bounded by kMinimalTargetExponent and
+// kMaximalTargetExponent.
+//       Hence -60 <= w.e() <= -32.
+//
+// Returns false if it fails, in which case the generated digits in the buffer
+// should not be used.
+// Preconditions:
+//  * w is correct up to 1 ulp (unit in the last place). That
+//    is, its error must be strictly less than a unit of its last digit.
+//  * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent
+//
+// Postconditions: returns false if procedure fails.
+//   otherwise:
+//     * buffer is not null-terminated, but length contains the number of
+//       digits.
+//     * the representation in buffer is the most precise representation of
+//       requested_digits digits.
+//     * buffer contains at most requested_digits digits of w. If there are less
+//       than requested_digits digits then some trailing '0's have been removed.
+//     * kappa is such that
+//            w = buffer * 10^kappa + eps with |eps| < 10^kappa / 2.
+//
+// Remark: This procedure takes into account the imprecision of its input
+//   numbers. If the precision is not enough to guarantee all the postconditions
+//   then false is returned. This usually happens rarely, but the failure-rate
+//   increases with higher requested_digits.
+static bool DigitGenCounted(DiyFp w,
+                            int requested_digits,
+                            Vector<char> buffer,
+                            int* length,
+                            int* kappa) {
+  ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent);
+  ASSERT(kMinimalTargetExponent >= -60);
+  ASSERT(kMaximalTargetExponent <= -32);
+  // w is assumed to have an error less than 1 unit. Whenever w is scaled we
+  // also scale its error.
+  uint64_t w_error = 1;
+  // We cut the input number into two parts: the integral digits and the
+  // fractional digits. We don't emit any decimal separator, but adapt kappa
+  // instead. Example: instead of writing "1.2" we put "12" into the buffer and
+  // increase kappa by 1.
+  DiyFp one = DiyFp(static_cast<uint64_t>(1) << -w.e(), w.e());
+  // Division by one is a shift.
+  uint32_t integrals = static_cast<uint32_t>(w.f() >> -one.e());
+  // Modulo by one is an and.
+  uint64_t fractionals = w.f() & (one.f() - 1);
+  uint32_t divisor;
+  int divisor_exponent_plus_one;
+  BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()),
+                  &divisor, &divisor_exponent_plus_one);
+  *kappa = divisor_exponent_plus_one;
+  *length = 0;
+
+  // Loop invariant: buffer = w / 10^kappa  (integer division)
+  // The invariant holds for the first iteration: kappa has been initialized
+  // with the divisor exponent + 1. And the divisor is the biggest power of ten
+  // that is smaller than 'integrals'.
+  while (*kappa > 0) {
+    int digit = integrals / divisor;
+    buffer[*length] = '0' + digit;
+    (*length)++;
+    requested_digits--;
+    integrals %= divisor;
+    (*kappa)--;
+    // Note that kappa now equals the exponent of the divisor and that the
+    // invariant thus holds again.
+    if (requested_digits == 0) break;
+    divisor /= 10;
+  }
+
+  if (requested_digits == 0) {
+    uint64_t rest =
+        (static_cast<uint64_t>(integrals) << -one.e()) + fractionals;
+    return RoundWeedCounted(buffer, *length, rest,
+                            static_cast<uint64_t>(divisor) << -one.e(), w_error,
+                            kappa);
+  }
+
+  // The integrals have been generated. We are at the point of the decimal
+  // separator. In the following loop we simply multiply the remaining digits by
+  // 10 and divide by one. We just need to pay attention to multiply associated
+  // data (the 'unit'), too.
+  // Note that the multiplication by 10 does not overflow, because w.e >= -60
+  // and thus one.e >= -60.
+  ASSERT(one.e() >= -60);
+  ASSERT(fractionals < one.f());
+  ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f());
+  while (requested_digits > 0 && fractionals > w_error) {
+    fractionals *= 10;
+    w_error *= 10;
+    // Integer division by one.
+    int digit = static_cast<int>(fractionals >> -one.e());
+    buffer[*length] = '0' + digit;
+    (*length)++;
+    requested_digits--;
+    fractionals &= one.f() - 1;  // Modulo by one.
+    (*kappa)--;
+  }
+  if (requested_digits != 0) return false;
+  return RoundWeedCounted(buffer, *length, fractionals, one.f(), w_error,
+                          kappa);
+}
+
+
+// Provides a decimal representation of v.
+// Returns true if it succeeds, otherwise the result cannot be trusted.
+// There will be *length digits inside the buffer (not null-terminated).
+// If the function returns true then
+//        v == (double) (buffer * 10^decimal_exponent).
+// The digits in the buffer are the shortest representation possible: no
+// 0.09999999999999999 instead of 0.1. The shorter representation will even be
+// chosen even if the longer one would be closer to v.
+// The last digit will be closest to the actual v. That is, even if several
+// digits might correctly yield 'v' when read again, the closest will be
+// computed.
+static bool Grisu3(double v,
+                   FastDtoaMode mode,
+                   Vector<char> buffer,
+                   int* length,
+                   int* decimal_exponent) {
+  DiyFp w = Double(v).AsNormalizedDiyFp();
+  // boundary_minus and boundary_plus are the boundaries between v and its
+  // closest floating-point neighbors. Any number strictly between
+  // boundary_minus and boundary_plus will round to v when convert to a double.
+  // Grisu3 will never output representations that lie exactly on a boundary.
+  DiyFp boundary_minus, boundary_plus;
+  if (mode == FAST_DTOA_SHORTEST) {
+    Double(v).NormalizedBoundaries(&boundary_minus, &boundary_plus);
+  } else {
+    ASSERT(mode == FAST_DTOA_SHORTEST_SINGLE);
+    float single_v = static_cast<float>(v);
+    Single(single_v).NormalizedBoundaries(&boundary_minus, &boundary_plus);
+  }
+  ASSERT(boundary_plus.e() == w.e());
+  DiyFp ten_mk;  // Cached power of ten: 10^-k
+  int mk;        // -k
+  int ten_mk_minimal_binary_exponent =
+     kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize);
+  int ten_mk_maximal_binary_exponent =
+     kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize);
+  PowersOfTenCache::GetCachedPowerForBinaryExponentRange(
+      ten_mk_minimal_binary_exponent,
+      ten_mk_maximal_binary_exponent,
+      &ten_mk, &mk);
+  ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() +
+          DiyFp::kSignificandSize) &&
+         (kMaximalTargetExponent >= w.e() + ten_mk.e() +
+          DiyFp::kSignificandSize));
+  // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a
+  // 64 bit significand and ten_mk is thus only precise up to 64 bits.
+
+  // The DiyFp::Times procedure rounds its result, and ten_mk is approximated
+  // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now
+  // off by a small amount.
+  // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w.
+  // In other words: let f = scaled_w.f() and e = scaled_w.e(), then
+  //           (f-1) * 2^e < w*10^k < (f+1) * 2^e
+  DiyFp scaled_w = DiyFp::Times(w, ten_mk);
+  ASSERT(scaled_w.e() ==
+         boundary_plus.e() + ten_mk.e() + DiyFp::kSignificandSize);
+  // In theory it would be possible to avoid some recomputations by computing
+  // the difference between w and boundary_minus/plus (a power of 2) and to
+  // compute scaled_boundary_minus/plus by subtracting/adding from
+  // scaled_w. However the code becomes much less readable and the speed
+  // enhancements are not terriffic.
+  DiyFp scaled_boundary_minus = DiyFp::Times(boundary_minus, ten_mk);
+  DiyFp scaled_boundary_plus  = DiyFp::Times(boundary_plus,  ten_mk);
+
+  // DigitGen will generate the digits of scaled_w. Therefore we have
+  // v == (double) (scaled_w * 10^-mk).
+  // Set decimal_exponent == -mk and pass it to DigitGen. If scaled_w is not an
+  // integer than it will be updated. For instance if scaled_w == 1.23 then
+  // the buffer will be filled with "123" und the decimal_exponent will be
+  // decreased by 2.
+  int kappa;
+  bool result = DigitGen(scaled_boundary_minus, scaled_w, scaled_boundary_plus,
+                         buffer, length, &kappa);
+  *decimal_exponent = -mk + kappa;
+  return result;
+}
+
+
+// The "counted" version of grisu3 (see above) only generates requested_digits
+// number of digits. This version does not generate the shortest representation,
+// and with enough requested digits 0.1 will at some point print as 0.9999999...
+// Grisu3 is too imprecise for real halfway cases (1.5 will not work) and
+// therefore the rounding strategy for halfway cases is irrelevant.
+static bool Grisu3Counted(double v,
+                          int requested_digits,
+                          Vector<char> buffer,
+                          int* length,
+                          int* decimal_exponent) {
+  DiyFp w = Double(v).AsNormalizedDiyFp();
+  DiyFp ten_mk;  // Cached power of ten: 10^-k
+  int mk;        // -k
+  int ten_mk_minimal_binary_exponent =
+     kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize);
+  int ten_mk_maximal_binary_exponent =
+     kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize);
+  PowersOfTenCache::GetCachedPowerForBinaryExponentRange(
+      ten_mk_minimal_binary_exponent,
+      ten_mk_maximal_binary_exponent,
+      &ten_mk, &mk);
+  ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() +
+          DiyFp::kSignificandSize) &&
+         (kMaximalTargetExponent >= w.e() + ten_mk.e() +
+          DiyFp::kSignificandSize));
+  // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a
+  // 64 bit significand and ten_mk is thus only precise up to 64 bits.
+
+  // The DiyFp::Times procedure rounds its result, and ten_mk is approximated
+  // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now
+  // off by a small amount.
+  // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w.
+  // In other words: let f = scaled_w.f() and e = scaled_w.e(), then
+  //           (f-1) * 2^e < w*10^k < (f+1) * 2^e
+  DiyFp scaled_w = DiyFp::Times(w, ten_mk);
+
+  // We now have (double) (scaled_w * 10^-mk).
+  // DigitGen will generate the first requested_digits digits of scaled_w and
+  // return together with a kappa such that scaled_w ~= buffer * 10^kappa. (It
+  // will not always be exactly the same since DigitGenCounted only produces a
+  // limited number of digits.)
+  int kappa;
+  bool result = DigitGenCounted(scaled_w, requested_digits,
+                                buffer, length, &kappa);
+  *decimal_exponent = -mk + kappa;
+  return result;
+}
+
+
+bool FastDtoa(double v,
+              FastDtoaMode mode,
+              int requested_digits,
+              Vector<char> buffer,
+              int* length,
+              int* decimal_point) {
+  ASSERT(v > 0);
+  ASSERT(!Double(v).IsSpecial());
+
+  bool result = false;
+  int decimal_exponent = 0;
+  switch (mode) {
+    case FAST_DTOA_SHORTEST:
+    case FAST_DTOA_SHORTEST_SINGLE:
+      result = Grisu3(v, mode, buffer, length, &decimal_exponent);
+      break;
+    case FAST_DTOA_PRECISION:
+      result = Grisu3Counted(v, requested_digits,
+                             buffer, length, &decimal_exponent);
+      break;
+    default:
+      UNREACHABLE();
+  }
+  if (result) {
+    *decimal_point = *length + decimal_exponent;
+    buffer[*length] = '\0';
+  }
+  return result;
+}
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/fast-dtoa.h b/klm/util/double-conversion/fast-dtoa.h
new file mode 100644
index 00000000..5f1e8eee
--- /dev/null
+++ b/klm/util/double-conversion/fast-dtoa.h
@@ -0,0 +1,88 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_FAST_DTOA_H_
+#define DOUBLE_CONVERSION_FAST_DTOA_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+enum FastDtoaMode {
+  // Computes the shortest representation of the given input. The returned
+  // result will be the most accurate number of this length. Longer
+  // representations might be more accurate.
+  FAST_DTOA_SHORTEST,
+  // Same as FAST_DTOA_SHORTEST but for single-precision floats.
+  FAST_DTOA_SHORTEST_SINGLE,
+  // Computes a representation where the precision (number of digits) is
+  // given as input. The precision is independent of the decimal point.
+  FAST_DTOA_PRECISION
+};
+
+// FastDtoa will produce at most kFastDtoaMaximalLength digits. This does not
+// include the terminating '\0' character.
+static const int kFastDtoaMaximalLength = 17;
+// Same for single-precision numbers.
+static const int kFastDtoaMaximalSingleLength = 9;
+
+// Provides a decimal representation of v.
+// The result should be interpreted as buffer * 10^(point - length).
+//
+// Precondition:
+//   * v must be a strictly positive finite double.
+//
+// Returns true if it succeeds, otherwise the result can not be trusted.
+// There will be *length digits inside the buffer followed by a null terminator.
+// If the function returns true and mode equals
+//   - FAST_DTOA_SHORTEST, then
+//     the parameter requested_digits is ignored.
+//     The result satisfies
+//         v == (double) (buffer * 10^(point - length)).
+//     The digits in the buffer are the shortest representation possible. E.g.
+//     if 0.099999999999 and 0.1 represent the same double then "1" is returned
+//     with point = 0.
+//     The last digit will be closest to the actual v. That is, even if several
+//     digits might correctly yield 'v' when read again, the buffer will contain
+//     the one closest to v.
+//   - FAST_DTOA_PRECISION, then
+//     the buffer contains requested_digits digits.
+//     the difference v - (buffer * 10^(point-length)) is closest to zero for
+//     all possible representations of requested_digits digits.
+//     If there are two values that are equally close, then FastDtoa returns
+//     false.
+// For both modes the buffer must be large enough to hold the result.
+bool FastDtoa(double d,
+              FastDtoaMode mode,
+              int requested_digits,
+              Vector<char> buffer,
+              int* length,
+              int* decimal_point);
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_FAST_DTOA_H_
diff --git a/klm/util/double-conversion/fixed-dtoa.cc b/klm/util/double-conversion/fixed-dtoa.cc
new file mode 100644
index 00000000..d56b1449
--- /dev/null
+++ b/klm/util/double-conversion/fixed-dtoa.cc
@@ -0,0 +1,402 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <math.h>
+
+#include "fixed-dtoa.h"
+#include "ieee.h"
+
+namespace double_conversion {
+
+// Represents a 128bit type. This class should be replaced by a native type on
+// platforms that support 128bit integers.
+class UInt128 {
+ public:
+  UInt128() : high_bits_(0), low_bits_(0) { }
+  UInt128(uint64_t high, uint64_t low) : high_bits_(high), low_bits_(low) { }
+
+  void Multiply(uint32_t multiplicand) {
+    uint64_t accumulator;
+
+    accumulator = (low_bits_ & kMask32) * multiplicand;
+    uint32_t part = static_cast<uint32_t>(accumulator & kMask32);
+    accumulator >>= 32;
+    accumulator = accumulator + (low_bits_ >> 32) * multiplicand;
+    low_bits_ = (accumulator << 32) + part;
+    accumulator >>= 32;
+    accumulator = accumulator + (high_bits_ & kMask32) * multiplicand;
+    part = static_cast<uint32_t>(accumulator & kMask32);
+    accumulator >>= 32;
+    accumulator = accumulator + (high_bits_ >> 32) * multiplicand;
+    high_bits_ = (accumulator << 32) + part;
+    ASSERT((accumulator >> 32) == 0);
+  }
+
+  void Shift(int shift_amount) {
+    ASSERT(-64 <= shift_amount && shift_amount <= 64);
+    if (shift_amount == 0) {
+      return;
+    } else if (shift_amount == -64) {
+      high_bits_ = low_bits_;
+      low_bits_ = 0;
+    } else if (shift_amount == 64) {
+      low_bits_ = high_bits_;
+      high_bits_ = 0;
+    } else if (shift_amount <= 0) {
+      high_bits_ <<= -shift_amount;
+      high_bits_ += low_bits_ >> (64 + shift_amount);
+      low_bits_ <<= -shift_amount;
+    } else {
+      low_bits_ >>= shift_amount;
+      low_bits_ += high_bits_ << (64 - shift_amount);
+      high_bits_ >>= shift_amount;
+    }
+  }
+
+  // Modifies *this to *this MOD (2^power).
+  // Returns *this DIV (2^power).
+  int DivModPowerOf2(int power) {
+    if (power >= 64) {
+      int result = static_cast<int>(high_bits_ >> (power - 64));
+      high_bits_ -= static_cast<uint64_t>(result) << (power - 64);
+      return result;
+    } else {
+      uint64_t part_low = low_bits_ >> power;
+      uint64_t part_high = high_bits_ << (64 - power);
+      int result = static_cast<int>(part_low + part_high);
+      high_bits_ = 0;
+      low_bits_ -= part_low << power;
+      return result;
+    }
+  }
+
+  bool IsZero() const {
+    return high_bits_ == 0 && low_bits_ == 0;
+  }
+
+  int BitAt(int position) {
+    if (position >= 64) {
+      return static_cast<int>(high_bits_ >> (position - 64)) & 1;
+    } else {
+      return static_cast<int>(low_bits_ >> position) & 1;
+    }
+  }
+
+ private:
+  static const uint64_t kMask32 = 0xFFFFFFFF;
+  // Value == (high_bits_ << 64) + low_bits_
+  uint64_t high_bits_;
+  uint64_t low_bits_;
+};
+
+
+static const int kDoubleSignificandSize = 53;  // Includes the hidden bit.
+
+
+static void FillDigits32FixedLength(uint32_t number, int requested_length,
+                                    Vector<char> buffer, int* length) {
+  for (int i = requested_length - 1; i >= 0; --i) {
+    buffer[(*length) + i] = '0' + number % 10;
+    number /= 10;
+  }
+  *length += requested_length;
+}
+
+
+static void FillDigits32(uint32_t number, Vector<char> buffer, int* length) {
+  int number_length = 0;
+  // We fill the digits in reverse order and exchange them afterwards.
+  while (number != 0) {
+    int digit = number % 10;
+    number /= 10;
+    buffer[(*length) + number_length] = '0' + digit;
+    number_length++;
+  }
+  // Exchange the digits.
+  int i = *length;
+  int j = *length + number_length - 1;
+  while (i < j) {
+    char tmp = buffer[i];
+    buffer[i] = buffer[j];
+    buffer[j] = tmp;
+    i++;
+    j--;
+  }
+  *length += number_length;
+}
+
+
+static void FillDigits64FixedLength(uint64_t number, int requested_length,
+                                    Vector<char> buffer, int* length) {
+  const uint32_t kTen7 = 10000000;
+  // For efficiency cut the number into 3 uint32_t parts, and print those.
+  uint32_t part2 = static_cast<uint32_t>(number % kTen7);
+  number /= kTen7;
+  uint32_t part1 = static_cast<uint32_t>(number % kTen7);
+  uint32_t part0 = static_cast<uint32_t>(number / kTen7);
+
+  FillDigits32FixedLength(part0, 3, buffer, length);
+  FillDigits32FixedLength(part1, 7, buffer, length);
+  FillDigits32FixedLength(part2, 7, buffer, length);
+}
+
+
+static void FillDigits64(uint64_t number, Vector<char> buffer, int* length) {
+  const uint32_t kTen7 = 10000000;
+  // For efficiency cut the number into 3 uint32_t parts, and print those.
+  uint32_t part2 = static_cast<uint32_t>(number % kTen7);
+  number /= kTen7;
+  uint32_t part1 = static_cast<uint32_t>(number % kTen7);
+  uint32_t part0 = static_cast<uint32_t>(number / kTen7);
+
+  if (part0 != 0) {
+    FillDigits32(part0, buffer, length);
+    FillDigits32FixedLength(part1, 7, buffer, length);
+    FillDigits32FixedLength(part2, 7, buffer, length);
+  } else if (part1 != 0) {
+    FillDigits32(part1, buffer, length);
+    FillDigits32FixedLength(part2, 7, buffer, length);
+  } else {
+    FillDigits32(part2, buffer, length);
+  }
+}
+
+
+static void RoundUp(Vector<char> buffer, int* length, int* decimal_point) {
+  // An empty buffer represents 0.
+  if (*length == 0) {
+    buffer[0] = '1';
+    *decimal_point = 1;
+    *length = 1;
+    return;
+  }
+  // Round the last digit until we either have a digit that was not '9' or until
+  // we reached the first digit.
+  buffer[(*length) - 1]++;
+  for (int i = (*length) - 1; i > 0; --i) {
+    if (buffer[i] != '0' + 10) {
+      return;
+    }
+    buffer[i] = '0';
+    buffer[i - 1]++;
+  }
+  // If the first digit is now '0' + 10, we would need to set it to '0' and add
+  // a '1' in front. However we reach the first digit only if all following
+  // digits had been '9' before rounding up. Now all trailing digits are '0' and
+  // we simply switch the first digit to '1' and update the decimal-point
+  // (indicating that the point is now one digit to the right).
+  if (buffer[0] == '0' + 10) {
+    buffer[0] = '1';
+    (*decimal_point)++;
+  }
+}
+
+
+// The given fractionals number represents a fixed-point number with binary
+// point at bit (-exponent).
+// Preconditions:
+//   -128 <= exponent <= 0.
+//   0 <= fractionals * 2^exponent < 1
+//   The buffer holds the result.
+// The function will round its result. During the rounding-process digits not
+// generated by this function might be updated, and the decimal-point variable
+// might be updated. If this function generates the digits 99 and the buffer
+// already contained "199" (thus yielding a buffer of "19999") then a
+// rounding-up will change the contents of the buffer to "20000".
+static void FillFractionals(uint64_t fractionals, int exponent,
+                            int fractional_count, Vector<char> buffer,
+                            int* length, int* decimal_point) {
+  ASSERT(-128 <= exponent && exponent <= 0);
+  // 'fractionals' is a fixed-point number, with binary point at bit
+  // (-exponent). Inside the function the non-converted remainder of fractionals
+  // is a fixed-point number, with binary point at bit 'point'.
+  if (-exponent <= 64) {
+    // One 64 bit number is sufficient.
+    ASSERT(fractionals >> 56 == 0);
+    int point = -exponent;
+    for (int i = 0; i < fractional_count; ++i) {
+      if (fractionals == 0) break;
+      // Instead of multiplying by 10 we multiply by 5 and adjust the point
+      // location. This way the fractionals variable will not overflow.
+      // Invariant at the beginning of the loop: fractionals < 2^point.
+      // Initially we have: point <= 64 and fractionals < 2^56
+      // After each iteration the point is decremented by one.
+      // Note that 5^3 = 125 < 128 = 2^7.
+      // Therefore three iterations of this loop will not overflow fractionals
+      // (even without the subtraction at the end of the loop body). At this
+      // time point will satisfy point <= 61 and therefore fractionals < 2^point
+      // and any further multiplication of fractionals by 5 will not overflow.
+      fractionals *= 5;
+      point--;
+      int digit = static_cast<int>(fractionals >> point);
+      buffer[*length] = '0' + digit;
+      (*length)++;
+      fractionals -= static_cast<uint64_t>(digit) << point;
+    }
+    // If the first bit after the point is set we have to round up.
+    if (((fractionals >> (point - 1)) & 1) == 1) {
+      RoundUp(buffer, length, decimal_point);
+    }
+  } else {  // We need 128 bits.
+    ASSERT(64 < -exponent && -exponent <= 128);
+    UInt128 fractionals128 = UInt128(fractionals, 0);
+    fractionals128.Shift(-exponent - 64);
+    int point = 128;
+    for (int i = 0; i < fractional_count; ++i) {
+      if (fractionals128.IsZero()) break;
+      // As before: instead of multiplying by 10 we multiply by 5 and adjust the
+      // point location.
+      // This multiplication will not overflow for the same reasons as before.
+      fractionals128.Multiply(5);
+      point--;
+      int digit = fractionals128.DivModPowerOf2(point);
+      buffer[*length] = '0' + digit;
+      (*length)++;
+    }
+    if (fractionals128.BitAt(point - 1) == 1) {
+      RoundUp(buffer, length, decimal_point);
+    }
+  }
+}
+
+
+// Removes leading and trailing zeros.
+// If leading zeros are removed then the decimal point position is adjusted.
+static void TrimZeros(Vector<char> buffer, int* length, int* decimal_point) {
+  while (*length > 0 && buffer[(*length) - 1] == '0') {
+    (*length)--;
+  }
+  int first_non_zero = 0;
+  while (first_non_zero < *length && buffer[first_non_zero] == '0') {
+    first_non_zero++;
+  }
+  if (first_non_zero != 0) {
+    for (int i = first_non_zero; i < *length; ++i) {
+      buffer[i - first_non_zero] = buffer[i];
+    }
+    *length -= first_non_zero;
+    *decimal_point -= first_non_zero;
+  }
+}
+
+
+bool FastFixedDtoa(double v,
+                   int fractional_count,
+                   Vector<char> buffer,
+                   int* length,
+                   int* decimal_point) {
+  const uint32_t kMaxUInt32 = 0xFFFFFFFF;
+  uint64_t significand = Double(v).Significand();
+  int exponent = Double(v).Exponent();
+  // v = significand * 2^exponent (with significand a 53bit integer).
+  // If the exponent is larger than 20 (i.e. we may have a 73bit number) then we
+  // don't know how to compute the representation. 2^73 ~= 9.5*10^21.
+  // If necessary this limit could probably be increased, but we don't need
+  // more.
+  if (exponent > 20) return false;
+  if (fractional_count > 20) return false;
+  *length = 0;
+  // At most kDoubleSignificandSize bits of the significand are non-zero.
+  // Given a 64 bit integer we have 11 0s followed by 53 potentially non-zero
+  // bits:  0..11*..0xxx..53*..xx
+  if (exponent + kDoubleSignificandSize > 64) {
+    // The exponent must be > 11.
+    //
+    // We know that v = significand * 2^exponent.
+    // And the exponent > 11.
+    // We simplify the task by dividing v by 10^17.
+    // The quotient delivers the first digits, and the remainder fits into a 64
+    // bit number.
+    // Dividing by 10^17 is equivalent to dividing by 5^17*2^17.
+    const uint64_t kFive17 = UINT64_2PART_C(0xB1, A2BC2EC5);  // 5^17
+    uint64_t divisor = kFive17;
+    int divisor_power = 17;
+    uint64_t dividend = significand;
+    uint32_t quotient;
+    uint64_t remainder;
+    // Let v = f * 2^e with f == significand and e == exponent.
+    // Then need q (quotient) and r (remainder) as follows:
+    //   v            = q * 10^17       + r
+    //   f * 2^e      = q * 10^17       + r
+    //   f * 2^e      = q * 5^17 * 2^17 + r
+    // If e > 17 then
+    //   f * 2^(e-17) = q * 5^17        + r/2^17
+    // else
+    //   f  = q * 5^17 * 2^(17-e) + r/2^e
+    if (exponent > divisor_power) {
+      // We only allow exponents of up to 20 and therefore (17 - e) <= 3
+      dividend <<= exponent - divisor_power;
+      quotient = static_cast<uint32_t>(dividend / divisor);
+      remainder = (dividend % divisor) << divisor_power;
+    } else {
+      divisor <<= divisor_power - exponent;
+      quotient = static_cast<uint32_t>(dividend / divisor);
+      remainder = (dividend % divisor) << exponent;
+    }
+    FillDigits32(quotient, buffer, length);
+    FillDigits64FixedLength(remainder, divisor_power, buffer, length);
+    *decimal_point = *length;
+  } else if (exponent >= 0) {
+    // 0 <= exponent <= 11
+    significand <<= exponent;
+    FillDigits64(significand, buffer, length);
+    *decimal_point = *length;
+  } else if (exponent > -kDoubleSignificandSize) {
+    // We have to cut the number.
+    uint64_t integrals = significand >> -exponent;
+    uint64_t fractionals = significand - (integrals << -exponent);
+    if (integrals > kMaxUInt32) {
+      FillDigits64(integrals, buffer, length);
+    } else {
+      FillDigits32(static_cast<uint32_t>(integrals), buffer, length);
+    }
+    *decimal_point = *length;
+    FillFractionals(fractionals, exponent, fractional_count,
+                    buffer, length, decimal_point);
+  } else if (exponent < -128) {
+    // This configuration (with at most 20 digits) means that all digits must be
+    // 0.
+    ASSERT(fractional_count <= 20);
+    buffer[0] = '\0';
+    *length = 0;
+    *decimal_point = -fractional_count;
+  } else {
+    *decimal_point = 0;
+    FillFractionals(significand, exponent, fractional_count,
+                    buffer, length, decimal_point);
+  }
+  TrimZeros(buffer, length, decimal_point);
+  buffer[*length] = '\0';
+  if ((*length) == 0) {
+    // The string is empty and the decimal_point thus has no importance. Mimick
+    // Gay's dtoa and and set it to -fractional_count.
+    *decimal_point = -fractional_count;
+  }
+  return true;
+}
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/fixed-dtoa.h b/klm/util/double-conversion/fixed-dtoa.h
new file mode 100644
index 00000000..3bdd08e2
--- /dev/null
+++ b/klm/util/double-conversion/fixed-dtoa.h
@@ -0,0 +1,56 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_FIXED_DTOA_H_
+#define DOUBLE_CONVERSION_FIXED_DTOA_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+// Produces digits necessary to print a given number with
+// 'fractional_count' digits after the decimal point.
+// The buffer must be big enough to hold the result plus one terminating null
+// character.
+//
+// The produced digits might be too short in which case the caller has to fill
+// the gaps with '0's.
+// Example: FastFixedDtoa(0.001, 5, ...) is allowed to return buffer = "1", and
+// decimal_point = -2.
+// Halfway cases are rounded towards +/-Infinity (away from 0). The call
+// FastFixedDtoa(0.15, 2, ...) thus returns buffer = "2", decimal_point = 0.
+// The returned buffer may contain digits that would be truncated from the
+// shortest representation of the input.
+//
+// This method only works for some parameters. If it can't handle the input it
+// returns false. The output is null-terminated when the function succeeds.
+bool FastFixedDtoa(double v, int fractional_count,
+                   Vector<char> buffer, int* length, int* decimal_point);
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_FIXED_DTOA_H_
diff --git a/klm/util/double-conversion/ieee.h b/klm/util/double-conversion/ieee.h
new file mode 100644
index 00000000..839dc47d
--- /dev/null
+++ b/klm/util/double-conversion/ieee.h
@@ -0,0 +1,398 @@
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_DOUBLE_H_
+#define DOUBLE_CONVERSION_DOUBLE_H_
+
+#include "diy-fp.h"
+
+namespace double_conversion {
+
+// We assume that doubles and uint64_t have the same endianness.
+static uint64_t double_to_uint64(double d) { return BitCast<uint64_t>(d); }
+static double uint64_to_double(uint64_t d64) { return BitCast<double>(d64); }
+static uint32_t float_to_uint32(float f) { return BitCast<uint32_t>(f); }
+static float uint32_to_float(uint32_t d32) { return BitCast<float>(d32); }
+
+// Helper functions for doubles.
+class Double {
+ public:
+  static const uint64_t kSignMask = UINT64_2PART_C(0x80000000, 00000000);
+  static const uint64_t kExponentMask = UINT64_2PART_C(0x7FF00000, 00000000);
+  static const uint64_t kSignificandMask = UINT64_2PART_C(0x000FFFFF, FFFFFFFF);
+  static const uint64_t kHiddenBit = UINT64_2PART_C(0x00100000, 00000000);
+  static const int kPhysicalSignificandSize = 52;  // Excludes the hidden bit.
+  static const int kSignificandSize = 53;
+
+  Double() : d64_(0) {}
+  explicit Double(double d) : d64_(double_to_uint64(d)) {}
+  explicit Double(uint64_t d64) : d64_(d64) {}
+  explicit Double(DiyFp diy_fp)
+    : d64_(DiyFpToUint64(diy_fp)) {}
+
+  // The value encoded by this Double must be greater or equal to +0.0.
+  // It must not be special (infinity, or NaN).
+  DiyFp AsDiyFp() const {
+    ASSERT(Sign() > 0);
+    ASSERT(!IsSpecial());
+    return DiyFp(Significand(), Exponent());
+  }
+
+  // The value encoded by this Double must be strictly greater than 0.
+  DiyFp AsNormalizedDiyFp() const {
+    ASSERT(value() > 0.0);
+    uint64_t f = Significand();
+    int e = Exponent();
+
+    // The current double could be a denormal.
+    while ((f & kHiddenBit) == 0) {
+      f <<= 1;
+      e--;
+    }
+    // Do the final shifts in one go.
+    f <<= DiyFp::kSignificandSize - kSignificandSize;
+    e -= DiyFp::kSignificandSize - kSignificandSize;
+    return DiyFp(f, e);
+  }
+
+  // Returns the double's bit as uint64.
+  uint64_t AsUint64() const {
+    return d64_;
+  }
+
+  // Returns the next greater double. Returns +infinity on input +infinity.
+  double NextDouble() const {
+    if (d64_ == kInfinity) return Double(kInfinity).value();
+    if (Sign() < 0 && Significand() == 0) {
+      // -0.0
+      return 0.0;
+    }
+    if (Sign() < 0) {
+      return Double(d64_ - 1).value();
+    } else {
+      return Double(d64_ + 1).value();
+    }
+  }
+
+  double PreviousDouble() const {
+    if (d64_ == (kInfinity | kSignMask)) return -Double::Infinity();
+    if (Sign() < 0) {
+      return Double(d64_ + 1).value();
+    } else {
+      if (Significand() == 0) return -0.0;
+      return Double(d64_ - 1).value();
+    }
+  }
+
+  int Exponent() const {
+    if (IsDenormal()) return kDenormalExponent;
+
+    uint64_t d64 = AsUint64();
+    int biased_e =
+        static_cast<int>((d64 & kExponentMask) >> kPhysicalSignificandSize);
+    return biased_e - kExponentBias;
+  }
+
+  uint64_t Significand() const {
+    uint64_t d64 = AsUint64();
+    uint64_t significand = d64 & kSignificandMask;
+    if (!IsDenormal()) {
+      return significand + kHiddenBit;
+    } else {
+      return significand;
+    }
+  }
+
+  // Returns true if the double is a denormal.
+  bool IsDenormal() const {
+    uint64_t d64 = AsUint64();
+    return (d64 & kExponentMask) == 0;
+  }
+
+  // We consider denormals not to be special.
+  // Hence only Infinity and NaN are special.
+  bool IsSpecial() const {
+    uint64_t d64 = AsUint64();
+    return (d64 & kExponentMask) == kExponentMask;
+  }
+
+  bool IsNan() const {
+    uint64_t d64 = AsUint64();
+    return ((d64 & kExponentMask) == kExponentMask) &&
+        ((d64 & kSignificandMask) != 0);
+  }
+
+  bool IsInfinite() const {
+    uint64_t d64 = AsUint64();
+    return ((d64 & kExponentMask) == kExponentMask) &&
+        ((d64 & kSignificandMask) == 0);
+  }
+
+  int Sign() const {
+    uint64_t d64 = AsUint64();
+    return (d64 & kSignMask) == 0? 1: -1;
+  }
+
+  // Precondition: the value encoded by this Double must be greater or equal
+  // than +0.0.
+  DiyFp UpperBoundary() const {
+    ASSERT(Sign() > 0);
+    return DiyFp(Significand() * 2 + 1, Exponent() - 1);
+  }
+
+  // Computes the two boundaries of this.
+  // The bigger boundary (m_plus) is normalized. The lower boundary has the same
+  // exponent as m_plus.
+  // Precondition: the value encoded by this Double must be greater than 0.
+  void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const {
+    ASSERT(value() > 0.0);
+    DiyFp v = this->AsDiyFp();
+    DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1));
+    DiyFp m_minus;
+    if (LowerBoundaryIsCloser()) {
+      m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2);
+    } else {
+      m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1);
+    }
+    m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e()));
+    m_minus.set_e(m_plus.e());
+    *out_m_plus = m_plus;
+    *out_m_minus = m_minus;
+  }
+
+  bool LowerBoundaryIsCloser() const {
+    // The boundary is closer if the significand is of the form f == 2^p-1 then
+    // the lower boundary is closer.
+    // Think of v = 1000e10 and v- = 9999e9.
+    // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but
+    // at a distance of 1e8.
+    // The only exception is for the smallest normal: the largest denormal is
+    // at the same distance as its successor.
+    // Note: denormals have the same exponent as the smallest normals.
+    bool physical_significand_is_zero = ((AsUint64() & kSignificandMask) == 0);
+    return physical_significand_is_zero && (Exponent() != kDenormalExponent);
+  }
+
+  double value() const { return uint64_to_double(d64_); }
+
+  // Returns the significand size for a given order of magnitude.
+  // If v = f*2^e with 2^p-1 <= f <= 2^p then p+e is v's order of magnitude.
+  // This function returns the number of significant binary digits v will have
+  // once it's encoded into a double. In almost all cases this is equal to
+  // kSignificandSize. The only exceptions are denormals. They start with
+  // leading zeroes and their effective significand-size is hence smaller.
+  static int SignificandSizeForOrderOfMagnitude(int order) {
+    if (order >= (kDenormalExponent + kSignificandSize)) {
+      return kSignificandSize;
+    }
+    if (order <= kDenormalExponent) return 0;
+    return order - kDenormalExponent;
+  }
+
+  static double Infinity() {
+    return Double(kInfinity).value();
+  }
+
+  static double NaN() {
+    return Double(kNaN).value();
+  }
+
+ private:
+  static const int kExponentBias = 0x3FF + kPhysicalSignificandSize;
+  static const int kDenormalExponent = -kExponentBias + 1;
+  static const int kMaxExponent = 0x7FF - kExponentBias;
+  static const uint64_t kInfinity = UINT64_2PART_C(0x7FF00000, 00000000);
+  static const uint64_t kNaN = UINT64_2PART_C(0x7FF80000, 00000000);
+
+  const uint64_t d64_;
+
+  static uint64_t DiyFpToUint64(DiyFp diy_fp) {
+    uint64_t significand = diy_fp.f();
+    int exponent = diy_fp.e();
+    while (significand > kHiddenBit + kSignificandMask) {
+      significand >>= 1;
+      exponent++;
+    }
+    if (exponent >= kMaxExponent) {
+      return kInfinity;
+    }
+    if (exponent < kDenormalExponent) {
+      return 0;
+    }
+    while (exponent > kDenormalExponent && (significand & kHiddenBit) == 0) {
+      significand <<= 1;
+      exponent--;
+    }
+    uint64_t biased_exponent;
+    if (exponent == kDenormalExponent && (significand & kHiddenBit) == 0) {
+      biased_exponent = 0;
+    } else {
+      biased_exponent = static_cast<uint64_t>(exponent + kExponentBias);
+    }
+    return (significand & kSignificandMask) |
+        (biased_exponent << kPhysicalSignificandSize);
+  }
+};
+
+class Single {
+ public:
+  static const uint32_t kSignMask = 0x80000000;
+  static const uint32_t kExponentMask = 0x7F800000;
+  static const uint32_t kSignificandMask = 0x007FFFFF;
+  static const uint32_t kHiddenBit = 0x00800000;
+  static const int kPhysicalSignificandSize = 23;  // Excludes the hidden bit.
+  static const int kSignificandSize = 24;
+
+  Single() : d32_(0) {}
+  explicit Single(float f) : d32_(float_to_uint32(f)) {}
+  explicit Single(uint32_t d32) : d32_(d32) {}
+
+  // The value encoded by this Single must be greater or equal to +0.0.
+  // It must not be special (infinity, or NaN).
+  DiyFp AsDiyFp() const {
+    ASSERT(Sign() > 0);
+    ASSERT(!IsSpecial());
+    return DiyFp(Significand(), Exponent());
+  }
+
+  // Returns the single's bit as uint64.
+  uint32_t AsUint32() const {
+    return d32_;
+  }
+
+  int Exponent() const {
+    if (IsDenormal()) return kDenormalExponent;
+
+    uint32_t d32 = AsUint32();
+    int biased_e =
+        static_cast<int>((d32 & kExponentMask) >> kPhysicalSignificandSize);
+    return biased_e - kExponentBias;
+  }
+
+  uint32_t Significand() const {
+    uint32_t d32 = AsUint32();
+    uint32_t significand = d32 & kSignificandMask;
+    if (!IsDenormal()) {
+      return significand + kHiddenBit;
+    } else {
+      return significand;
+    }
+  }
+
+  // Returns true if the single is a denormal.
+  bool IsDenormal() const {
+    uint32_t d32 = AsUint32();
+    return (d32 & kExponentMask) == 0;
+  }
+
+  // We consider denormals not to be special.
+  // Hence only Infinity and NaN are special.
+  bool IsSpecial() const {
+    uint32_t d32 = AsUint32();
+    return (d32 & kExponentMask) == kExponentMask;
+  }
+
+  bool IsNan() const {
+    uint32_t d32 = AsUint32();
+    return ((d32 & kExponentMask) == kExponentMask) &&
+        ((d32 & kSignificandMask) != 0);
+  }
+
+  bool IsInfinite() const {
+    uint32_t d32 = AsUint32();
+    return ((d32 & kExponentMask) == kExponentMask) &&
+        ((d32 & kSignificandMask) == 0);
+  }
+
+  int Sign() const {
+    uint32_t d32 = AsUint32();
+    return (d32 & kSignMask) == 0? 1: -1;
+  }
+
+  // Computes the two boundaries of this.
+  // The bigger boundary (m_plus) is normalized. The lower boundary has the same
+  // exponent as m_plus.
+  // Precondition: the value encoded by this Single must be greater than 0.
+  void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const {
+    ASSERT(value() > 0.0);
+    DiyFp v = this->AsDiyFp();
+    DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1));
+    DiyFp m_minus;
+    if (LowerBoundaryIsCloser()) {
+      m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2);
+    } else {
+      m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1);
+    }
+    m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e()));
+    m_minus.set_e(m_plus.e());
+    *out_m_plus = m_plus;
+    *out_m_minus = m_minus;
+  }
+
+  // Precondition: the value encoded by this Single must be greater or equal
+  // than +0.0.
+  DiyFp UpperBoundary() const {
+    ASSERT(Sign() > 0);
+    return DiyFp(Significand() * 2 + 1, Exponent() - 1);
+  }
+
+  bool LowerBoundaryIsCloser() const {
+    // The boundary is closer if the significand is of the form f == 2^p-1 then
+    // the lower boundary is closer.
+    // Think of v = 1000e10 and v- = 9999e9.
+    // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but
+    // at a distance of 1e8.
+    // The only exception is for the smallest normal: the largest denormal is
+    // at the same distance as its successor.
+    // Note: denormals have the same exponent as the smallest normals.
+    bool physical_significand_is_zero = ((AsUint32() & kSignificandMask) == 0);
+    return physical_significand_is_zero && (Exponent() != kDenormalExponent);
+  }
+
+  float value() const { return uint32_to_float(d32_); }
+
+  static float Infinity() {
+    return Single(kInfinity).value();
+  }
+
+  static float NaN() {
+    return Single(kNaN).value();
+  }
+
+ private:
+  static const int kExponentBias = 0x7F + kPhysicalSignificandSize;
+  static const int kDenormalExponent = -kExponentBias + 1;
+  static const int kMaxExponent = 0xFF - kExponentBias;
+  static const uint32_t kInfinity = 0x7F800000;
+  static const uint32_t kNaN = 0x7FC00000;
+
+  const uint32_t d32_;
+};
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_DOUBLE_H_
diff --git a/klm/util/double-conversion/strtod.cc b/klm/util/double-conversion/strtod.cc
new file mode 100644
index 00000000..e298766a
--- /dev/null
+++ b/klm/util/double-conversion/strtod.cc
@@ -0,0 +1,558 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <stdarg.h>
+#include <limits.h>
+
+#include "strtod.h"
+#include "bignum.h"
+#include "cached-powers.h"
+#include "ieee.h"
+
+namespace double_conversion {
+
+// 2^53 = 9007199254740992.
+// Any integer with at most 15 decimal digits will hence fit into a double
+// (which has a 53bit significand) without loss of precision.
+static const int kMaxExactDoubleIntegerDecimalDigits = 15;
+// 2^64 = 18446744073709551616 > 10^19
+static const int kMaxUint64DecimalDigits = 19;
+
+// Max double: 1.7976931348623157 x 10^308
+// Min non-zero double: 4.9406564584124654 x 10^-324
+// Any x >= 10^309 is interpreted as +infinity.
+// Any x <= 10^-324 is interpreted as 0.
+// Note that 2.5e-324 (despite being smaller than the min double) will be read
+// as non-zero (equal to the min non-zero double).
+static const int kMaxDecimalPower = 309;
+static const int kMinDecimalPower = -324;
+
+// 2^64 = 18446744073709551616
+static const uint64_t kMaxUint64 = UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF);
+
+
+static const double exact_powers_of_ten[] = {
+  1.0,  // 10^0
+  10.0,
+  100.0,
+  1000.0,
+  10000.0,
+  100000.0,
+  1000000.0,
+  10000000.0,
+  100000000.0,
+  1000000000.0,
+  10000000000.0,  // 10^10
+  100000000000.0,
+  1000000000000.0,
+  10000000000000.0,
+  100000000000000.0,
+  1000000000000000.0,
+  10000000000000000.0,
+  100000000000000000.0,
+  1000000000000000000.0,
+  10000000000000000000.0,
+  100000000000000000000.0,  // 10^20
+  1000000000000000000000.0,
+  // 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22
+  10000000000000000000000.0
+};
+static const int kExactPowersOfTenSize = ARRAY_SIZE(exact_powers_of_ten);
+
+// Maximum number of significant digits in the decimal representation.
+// In fact the value is 772 (see conversions.cc), but to give us some margin
+// we round up to 780.
+static const int kMaxSignificantDecimalDigits = 780;
+
+static Vector<const char> TrimLeadingZeros(Vector<const char> buffer) {
+  for (int i = 0; i < buffer.length(); i++) {
+    if (buffer[i] != '0') {
+      return buffer.SubVector(i, buffer.length());
+    }
+  }
+  return Vector<const char>(buffer.start(), 0);
+}
+
+
+static Vector<const char> TrimTrailingZeros(Vector<const char> buffer) {
+  for (int i = buffer.length() - 1; i >= 0; --i) {
+    if (buffer[i] != '0') {
+      return buffer.SubVector(0, i + 1);
+    }
+  }
+  return Vector<const char>(buffer.start(), 0);
+}
+
+
+static void CutToMaxSignificantDigits(Vector<const char> buffer,
+                                       int exponent,
+                                       char* significant_buffer,
+                                       int* significant_exponent) {
+  for (int i = 0; i < kMaxSignificantDecimalDigits - 1; ++i) {
+    significant_buffer[i] = buffer[i];
+  }
+  // The input buffer has been trimmed. Therefore the last digit must be
+  // different from '0'.
+  ASSERT(buffer[buffer.length() - 1] != '0');
+  // Set the last digit to be non-zero. This is sufficient to guarantee
+  // correct rounding.
+  significant_buffer[kMaxSignificantDecimalDigits - 1] = '1';
+  *significant_exponent =
+      exponent + (buffer.length() - kMaxSignificantDecimalDigits);
+}
+
+
+// Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits.
+// If possible the input-buffer is reused, but if the buffer needs to be
+// modified (due to cutting), then the input needs to be copied into the
+// buffer_copy_space.
+static void TrimAndCut(Vector<const char> buffer, int exponent,
+                       char* buffer_copy_space, int space_size,
+                       Vector<const char>* trimmed, int* updated_exponent) {
+  Vector<const char> left_trimmed = TrimLeadingZeros(buffer);
+  Vector<const char> right_trimmed = TrimTrailingZeros(left_trimmed);
+  exponent += left_trimmed.length() - right_trimmed.length();
+  if (right_trimmed.length() > kMaxSignificantDecimalDigits) {
+    ASSERT(space_size >= kMaxSignificantDecimalDigits);
+    CutToMaxSignificantDigits(right_trimmed, exponent,
+                              buffer_copy_space, updated_exponent);
+    *trimmed = Vector<const char>(buffer_copy_space,
+                                 kMaxSignificantDecimalDigits);
+  } else {
+    *trimmed = right_trimmed;
+    *updated_exponent = exponent;
+  }
+}
+
+
+// Reads digits from the buffer and converts them to a uint64.
+// Reads in as many digits as fit into a uint64.
+// When the string starts with "1844674407370955161" no further digit is read.
+// Since 2^64 = 18446744073709551616 it would still be possible read another
+// digit if it was less or equal than 6, but this would complicate the code.
+static uint64_t ReadUint64(Vector<const char> buffer,
+                           int* number_of_read_digits) {
+  uint64_t result = 0;
+  int i = 0;
+  while (i < buffer.length() && result <= (kMaxUint64 / 10 - 1)) {
+    int digit = buffer[i++] - '0';
+    ASSERT(0 <= digit && digit <= 9);
+    result = 10 * result + digit;
+  }
+  *number_of_read_digits = i;
+  return result;
+}
+
+
+// Reads a DiyFp from the buffer.
+// The returned DiyFp is not necessarily normalized.
+// If remaining_decimals is zero then the returned DiyFp is accurate.
+// Otherwise it has been rounded and has error of at most 1/2 ulp.
+static void ReadDiyFp(Vector<const char> buffer,
+                      DiyFp* result,
+                      int* remaining_decimals) {
+  int read_digits;
+  uint64_t significand = ReadUint64(buffer, &read_digits);
+  if (buffer.length() == read_digits) {
+    *result = DiyFp(significand, 0);
+    *remaining_decimals = 0;
+  } else {
+    // Round the significand.
+    if (buffer[read_digits] >= '5') {
+      significand++;
+    }
+    // Compute the binary exponent.
+    int exponent = 0;
+    *result = DiyFp(significand, exponent);
+    *remaining_decimals = buffer.length() - read_digits;
+  }
+}
+
+
+static bool DoubleStrtod(Vector<const char> trimmed,
+                         int exponent,
+                         double* result) {
+#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS)
+  // On x86 the floating-point stack can be 64 or 80 bits wide. If it is
+  // 80 bits wide (as is the case on Linux) then double-rounding occurs and the
+  // result is not accurate.
+  // We know that Windows32 uses 64 bits and is therefore accurate.
+  // Note that the ARM simulator is compiled for 32bits. It therefore exhibits
+  // the same problem.
+  return false;
+#endif
+  if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) {
+    int read_digits;
+    // The trimmed input fits into a double.
+    // If the 10^exponent (resp. 10^-exponent) fits into a double too then we
+    // can compute the result-double simply by multiplying (resp. dividing) the
+    // two numbers.
+    // This is possible because IEEE guarantees that floating-point operations
+    // return the best possible approximation.
+    if (exponent < 0 && -exponent < kExactPowersOfTenSize) {
+      // 10^-exponent fits into a double.
+      *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
+      ASSERT(read_digits == trimmed.length());
+      *result /= exact_powers_of_ten[-exponent];
+      return true;
+    }
+    if (0 <= exponent && exponent < kExactPowersOfTenSize) {
+      // 10^exponent fits into a double.
+      *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
+      ASSERT(read_digits == trimmed.length());
+      *result *= exact_powers_of_ten[exponent];
+      return true;
+    }
+    int remaining_digits =
+        kMaxExactDoubleIntegerDecimalDigits - trimmed.length();
+    if ((0 <= exponent) &&
+        (exponent - remaining_digits < kExactPowersOfTenSize)) {
+      // The trimmed string was short and we can multiply it with
+      // 10^remaining_digits. As a result the remaining exponent now fits
+      // into a double too.
+      *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
+      ASSERT(read_digits == trimmed.length());
+      *result *= exact_powers_of_ten[remaining_digits];
+      *result *= exact_powers_of_ten[exponent - remaining_digits];
+      return true;
+    }
+  }
+  return false;
+}
+
+
+// Returns 10^exponent as an exact DiyFp.
+// The given exponent must be in the range [1; kDecimalExponentDistance[.
+static DiyFp AdjustmentPowerOfTen(int exponent) {
+  ASSERT(0 < exponent);
+  ASSERT(exponent < PowersOfTenCache::kDecimalExponentDistance);
+  // Simply hardcode the remaining powers for the given decimal exponent
+  // distance.
+  ASSERT(PowersOfTenCache::kDecimalExponentDistance == 8);
+  switch (exponent) {
+    case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60);
+    case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57);
+    case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54);
+    case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50);
+    case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47);
+    case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44);
+    case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40);
+    default:
+      UNREACHABLE();
+      return DiyFp(0, 0);
+  }
+}
+
+
+// If the function returns true then the result is the correct double.
+// Otherwise it is either the correct double or the double that is just below
+// the correct double.
+static bool DiyFpStrtod(Vector<const char> buffer,
+                        int exponent,
+                        double* result) {
+  DiyFp input;
+  int remaining_decimals;
+  ReadDiyFp(buffer, &input, &remaining_decimals);
+  // Since we may have dropped some digits the input is not accurate.
+  // If remaining_decimals is different than 0 than the error is at most
+  // .5 ulp (unit in the last place).
+  // We don't want to deal with fractions and therefore keep a common
+  // denominator.
+  const int kDenominatorLog = 3;
+  const int kDenominator = 1 << kDenominatorLog;
+  // Move the remaining decimals into the exponent.
+  exponent += remaining_decimals;
+  int error = (remaining_decimals == 0 ? 0 : kDenominator / 2);
+
+  int old_e = input.e();
+  input.Normalize();
+  error <<= old_e - input.e();
+
+  ASSERT(exponent <= PowersOfTenCache::kMaxDecimalExponent);
+  if (exponent < PowersOfTenCache::kMinDecimalExponent) {
+    *result = 0.0;
+    return true;
+  }
+  DiyFp cached_power;
+  int cached_decimal_exponent;
+  PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent,
+                                                     &cached_power,
+                                                     &cached_decimal_exponent);
+
+  if (cached_decimal_exponent != exponent) {
+    int adjustment_exponent = exponent - cached_decimal_exponent;
+    DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent);
+    input.Multiply(adjustment_power);
+    if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) {
+      // The product of input with the adjustment power fits into a 64 bit
+      // integer.
+      ASSERT(DiyFp::kSignificandSize == 64);
+    } else {
+      // The adjustment power is exact. There is hence only an error of 0.5.
+      error += kDenominator / 2;
+    }
+  }
+
+  input.Multiply(cached_power);
+  // The error introduced by a multiplication of a*b equals
+  //   error_a + error_b + error_a*error_b/2^64 + 0.5
+  // Substituting a with 'input' and b with 'cached_power' we have
+  //   error_b = 0.5  (all cached powers have an error of less than 0.5 ulp),
+  //   error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64
+  int error_b = kDenominator / 2;
+  int error_ab = (error == 0 ? 0 : 1);  // We round up to 1.
+  int fixed_error = kDenominator / 2;
+  error += error_b + error_ab + fixed_error;
+
+  old_e = input.e();
+  input.Normalize();
+  error <<= old_e - input.e();
+
+  // See if the double's significand changes if we add/subtract the error.
+  int order_of_magnitude = DiyFp::kSignificandSize + input.e();
+  int effective_significand_size =
+      Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude);
+  int precision_digits_count =
+      DiyFp::kSignificandSize - effective_significand_size;
+  if (precision_digits_count + kDenominatorLog >= DiyFp::kSignificandSize) {
+    // This can only happen for very small denormals. In this case the
+    // half-way multiplied by the denominator exceeds the range of an uint64.
+    // Simply shift everything to the right.
+    int shift_amount = (precision_digits_count + kDenominatorLog) -
+        DiyFp::kSignificandSize + 1;
+    input.set_f(input.f() >> shift_amount);
+    input.set_e(input.e() + shift_amount);
+    // We add 1 for the lost precision of error, and kDenominator for
+    // the lost precision of input.f().
+    error = (error >> shift_amount) + 1 + kDenominator;
+    precision_digits_count -= shift_amount;
+  }
+  // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too.
+  ASSERT(DiyFp::kSignificandSize == 64);
+  ASSERT(precision_digits_count < 64);
+  uint64_t one64 = 1;
+  uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1;
+  uint64_t precision_bits = input.f() & precision_bits_mask;
+  uint64_t half_way = one64 << (precision_digits_count - 1);
+  precision_bits *= kDenominator;
+  half_way *= kDenominator;
+  DiyFp rounded_input(input.f() >> precision_digits_count,
+                      input.e() + precision_digits_count);
+  if (precision_bits >= half_way + error) {
+    rounded_input.set_f(rounded_input.f() + 1);
+  }
+  // If the last_bits are too close to the half-way case than we are too
+  // inaccurate and round down. In this case we return false so that we can
+  // fall back to a more precise algorithm.
+
+  *result = Double(rounded_input).value();
+  if (half_way - error < precision_bits && precision_bits < half_way + error) {
+    // Too imprecise. The caller will have to fall back to a slower version.
+    // However the returned number is guaranteed to be either the correct
+    // double, or the next-lower double.
+    return false;
+  } else {
+    return true;
+  }
+}
+
+
+// Returns
+//   - -1 if buffer*10^exponent < diy_fp.
+//   -  0 if buffer*10^exponent == diy_fp.
+//   - +1 if buffer*10^exponent > diy_fp.
+// Preconditions:
+//   buffer.length() + exponent <= kMaxDecimalPower + 1
+//   buffer.length() + exponent > kMinDecimalPower
+//   buffer.length() <= kMaxDecimalSignificantDigits
+static int CompareBufferWithDiyFp(Vector<const char> buffer,
+                                  int exponent,
+                                  DiyFp diy_fp) {
+  ASSERT(buffer.length() + exponent <= kMaxDecimalPower + 1);
+  ASSERT(buffer.length() + exponent > kMinDecimalPower);
+  ASSERT(buffer.length() <= kMaxSignificantDecimalDigits);
+  // Make sure that the Bignum will be able to hold all our numbers.
+  // Our Bignum implementation has a separate field for exponents. Shifts will
+  // consume at most one bigit (< 64 bits).
+  // ln(10) == 3.3219...
+  ASSERT(((kMaxDecimalPower + 1) * 333 / 100) < Bignum::kMaxSignificantBits);
+  Bignum buffer_bignum;
+  Bignum diy_fp_bignum;
+  buffer_bignum.AssignDecimalString(buffer);
+  diy_fp_bignum.AssignUInt64(diy_fp.f());
+  if (exponent >= 0) {
+    buffer_bignum.MultiplyByPowerOfTen(exponent);
+  } else {
+    diy_fp_bignum.MultiplyByPowerOfTen(-exponent);
+  }
+  if (diy_fp.e() > 0) {
+    diy_fp_bignum.ShiftLeft(diy_fp.e());
+  } else {
+    buffer_bignum.ShiftLeft(-diy_fp.e());
+  }
+  return Bignum::Compare(buffer_bignum, diy_fp_bignum);
+}
+
+
+// Returns true if the guess is the correct double.
+// Returns false, when guess is either correct or the next-lower double.
+static bool ComputeGuess(Vector<const char> trimmed, int exponent,
+                         double* guess) {
+  if (trimmed.length() == 0) {
+    *guess = 0.0;
+    return true;
+  }
+  if (exponent + trimmed.length() - 1 >= kMaxDecimalPower) {
+    *guess = Double::Infinity();
+    return true;
+  }
+  if (exponent + trimmed.length() <= kMinDecimalPower) {
+    *guess = 0.0;
+    return true;
+  }
+
+  if (DoubleStrtod(trimmed, exponent, guess) ||
+      DiyFpStrtod(trimmed, exponent, guess)) {
+    return true;
+  }
+  if (*guess == Double::Infinity()) {
+    return true;
+  }
+  return false;
+}
+
+double Strtod(Vector<const char> buffer, int exponent) {
+  char copy_buffer[kMaxSignificantDecimalDigits];
+  Vector<const char> trimmed;
+  int updated_exponent;
+  TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
+             &trimmed, &updated_exponent);
+  exponent = updated_exponent;
+
+  double guess;
+  bool is_correct = ComputeGuess(trimmed, exponent, &guess);
+  if (is_correct) return guess;
+
+  DiyFp upper_boundary = Double(guess).UpperBoundary();
+  int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
+  if (comparison < 0) {
+    return guess;
+  } else if (comparison > 0) {
+    return Double(guess).NextDouble();
+  } else if ((Double(guess).Significand() & 1) == 0) {
+    // Round towards even.
+    return guess;
+  } else {
+    return Double(guess).NextDouble();
+  }
+}
+
+float Strtof(Vector<const char> buffer, int exponent) {
+  char copy_buffer[kMaxSignificantDecimalDigits];
+  Vector<const char> trimmed;
+  int updated_exponent;
+  TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
+             &trimmed, &updated_exponent);
+  exponent = updated_exponent;
+
+  double double_guess;
+  bool is_correct = ComputeGuess(trimmed, exponent, &double_guess);
+
+  float float_guess = static_cast<float>(double_guess);
+  if (float_guess == double_guess) {
+    // This shortcut triggers for integer values.
+    return float_guess;
+  }
+
+  // We must catch double-rounding. Say the double has been rounded up, and is
+  // now a boundary of a float, and rounds up again. This is why we have to
+  // look at previous too.
+  // Example (in decimal numbers):
+  //    input: 12349
+  //    high-precision (4 digits): 1235
+  //    low-precision (3 digits):
+  //       when read from input: 123
+  //       when rounded from high precision: 124.
+  // To do this we simply look at the neigbors of the correct result and see
+  // if they would round to the same float. If the guess is not correct we have
+  // to look at four values (since two different doubles could be the correct
+  // double).
+
+  double double_next = Double(double_guess).NextDouble();
+  double double_previous = Double(double_guess).PreviousDouble();
+
+  float f1 = static_cast<float>(double_previous);
+#ifndef NDEBUG
+  float f2 = float_guess;
+#endif
+  float f3 = static_cast<float>(double_next);
+  float f4;
+  if (is_correct) {
+    f4 = f3;
+  } else {
+    double double_next2 = Double(double_next).NextDouble();
+    f4 = static_cast<float>(double_next2);
+  }
+#ifndef NDEBUG
+  ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4);
+#endif
+
+  // If the guess doesn't lie near a single-precision boundary we can simply
+  // return its float-value.
+  if (f1 == f4) {
+    return float_guess;
+  }
+
+  ASSERT((f1 != f2 && f2 == f3 && f3 == f4) ||
+         (f1 == f2 && f2 != f3 && f3 == f4) ||
+         (f1 == f2 && f2 == f3 && f3 != f4));
+
+  // guess and next are the two possible canditates (in the same way that
+  // double_guess was the lower candidate for a double-precision guess).
+  float guess = f1;
+  float next = f4;
+  DiyFp upper_boundary;
+  if (guess == 0.0f) {
+    float min_float = 1e-45f;
+    upper_boundary = Double(static_cast<double>(min_float) / 2).AsDiyFp();
+  } else {
+    upper_boundary = Single(guess).UpperBoundary();
+  }
+  int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
+  if (comparison < 0) {
+    return guess;
+  } else if (comparison > 0) {
+    return next;
+  } else if ((Single(guess).Significand() & 1) == 0) {
+    // Round towards even.
+    return guess;
+  } else {
+    return next;
+  }
+}
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/strtod.h b/klm/util/double-conversion/strtod.h
new file mode 100644
index 00000000..ed0293b8
--- /dev/null
+++ b/klm/util/double-conversion/strtod.h
@@ -0,0 +1,45 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_STRTOD_H_
+#define DOUBLE_CONVERSION_STRTOD_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+// The buffer must only contain digits in the range [0-9]. It must not
+// contain a dot or a sign. It must not start with '0', and must not be empty.
+double Strtod(Vector<const char> buffer, int exponent);
+
+// The buffer must only contain digits in the range [0-9]. It must not
+// contain a dot or a sign. It must not start with '0', and must not be empty.
+float Strtof(Vector<const char> buffer, int exponent);
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_STRTOD_H_
diff --git a/klm/util/double-conversion/utils.h b/klm/util/double-conversion/utils.h
new file mode 100644
index 00000000..767094b8
--- /dev/null
+++ b/klm/util/double-conversion/utils.h
@@ -0,0 +1,313 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_UTILS_H_
+#define DOUBLE_CONVERSION_UTILS_H_
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <assert.h>
+#ifndef ASSERT
+#define ASSERT(condition)      (assert(condition))
+#endif
+#ifndef UNIMPLEMENTED
+#define UNIMPLEMENTED() (abort())
+#endif
+#ifndef UNREACHABLE
+#define UNREACHABLE()   (abort())
+#endif
+
+// Double operations detection based on target architecture.
+// Linux uses a 80bit wide floating point stack on x86. This induces double
+// rounding, which in turn leads to wrong results.
+// An easy way to test if the floating-point operations are correct is to
+// evaluate: 89255.0/1e22. If the floating-point stack is 64 bits wide then
+// the result is equal to 89255e-22.
+// The best way to test this, is to create a division-function and to compare
+// the output of the division with the expected result. (Inlining must be
+// disabled.)
+// On Linux,x86 89255e-22 != Div_double(89255.0/1e22)
+#if defined(_M_X64) || defined(__x86_64__) || \
+    defined(__ARMEL__) || defined(__avr32__) || \
+    defined(__hppa__) || defined(__ia64__) || \
+    defined(__mips__) || defined(__powerpc__) || \
+    defined(__sparc__) || defined(__sparc) || defined(__s390__) || \
+    defined(__SH4__) || defined(__alpha__) || \
+    defined(_MIPS_ARCH_MIPS32R2)
+#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
+#elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
+#if defined(_WIN32)
+// Windows uses a 64bit wide floating point stack.
+#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
+#else
+#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS
+#endif  // _WIN32
+#else
+#error Target architecture was not detected as supported by Double-Conversion.
+#endif
+
+
+#if defined(_WIN32) && !defined(__MINGW32__)
+
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef short int16_t;  // NOLINT
+typedef unsigned short uint16_t;  // NOLINT
+typedef int int32_t;
+typedef unsigned int uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+// intptr_t and friends are defined in crtdefs.h through stdio.h.
+
+#else
+
+#include <stdint.h>
+
+#endif
+
+// The following macro works on both 32 and 64-bit platforms.
+// Usage: instead of writing 0x1234567890123456
+//      write UINT64_2PART_C(0x12345678,90123456);
+#define UINT64_2PART_C(a, b) (((static_cast<uint64_t>(a) << 32) + 0x##b##u))
+
+
+// The expression ARRAY_SIZE(a) is a compile-time constant of type
+// size_t which represents the number of elements of the given
+// array. You should only use ARRAY_SIZE on statically allocated
+// arrays.
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(a)                                   \
+  ((sizeof(a) / sizeof(*(a))) /                         \
+  static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
+#endif
+
+// A macro to disallow the evil copy constructor and operator= functions
+// This should be used in the private: declarations for a class
+#ifndef DISALLOW_COPY_AND_ASSIGN
+#define DISALLOW_COPY_AND_ASSIGN(TypeName)      \
+  TypeName(const TypeName&);                    \
+  void operator=(const TypeName&)
+#endif
+
+// A macro to disallow all the implicit constructors, namely the
+// default constructor, copy constructor and operator= functions.
+//
+// This should be used in the private: declarations for a class
+// that wants to prevent anyone from instantiating it. This is
+// especially useful for classes containing only static methods.
+#ifndef DISALLOW_IMPLICIT_CONSTRUCTORS
+#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
+  TypeName();                                    \
+  DISALLOW_COPY_AND_ASSIGN(TypeName)
+#endif
+
+namespace double_conversion {
+
+static const int kCharSize = sizeof(char);
+
+// Returns the maximum of the two parameters.
+template <typename T>
+static T Max(T a, T b) {
+  return a < b ? b : a;
+}
+
+
+// Returns the minimum of the two parameters.
+template <typename T>
+static T Min(T a, T b) {
+  return a < b ? a : b;
+}
+
+
+inline int StrLength(const char* string) {
+  size_t length = strlen(string);
+  ASSERT(length == static_cast<size_t>(static_cast<int>(length)));
+  return static_cast<int>(length);
+}
+
+// This is a simplified version of V8's Vector class.
+template <typename T>
+class Vector {
+ public:
+  Vector() : start_(NULL), length_(0) {}
+  Vector(T* data, int length) : start_(data), length_(length) {
+    ASSERT(length == 0 || (length > 0 && data != NULL));
+  }
+
+  // Returns a vector using the same backing storage as this one,
+  // spanning from and including 'from', to but not including 'to'.
+  Vector<T> SubVector(int from, int to) {
+    ASSERT(to <= length_);
+    ASSERT(from < to);
+    ASSERT(0 <= from);
+    return Vector<T>(start() + from, to - from);
+  }
+
+  // Returns the length of the vector.
+  int length() const { return length_; }
+
+  // Returns whether or not the vector is empty.
+  bool is_empty() const { return length_ == 0; }
+
+  // Returns the pointer to the start of the data in the vector.
+  T* start() const { return start_; }
+
+  // Access individual vector elements - checks bounds in debug mode.
+  T& operator[](int index) const {
+    ASSERT(0 <= index && index < length_);
+    return start_[index];
+  }
+
+  T& first() { return start_[0]; }
+
+  T& last() { return start_[length_ - 1]; }
+
+ private:
+  T* start_;
+  int length_;
+};
+
+
+// Helper class for building result strings in a character buffer. The
+// purpose of the class is to use safe operations that checks the
+// buffer bounds on all operations in debug mode.
+class StringBuilder {
+ public:
+  StringBuilder(char* buffer, int size)
+      : buffer_(buffer, size), position_(0) { }
+
+  ~StringBuilder() { if (!is_finalized()) Finalize(); }
+
+  int size() const { return buffer_.length(); }
+
+  // Get the current position in the builder.
+  int position() const {
+    ASSERT(!is_finalized());
+    return position_;
+  }
+
+  // Reset the position.
+  void Reset() { position_ = 0; }
+
+  // Add a single character to the builder. It is not allowed to add
+  // 0-characters; use the Finalize() method to terminate the string
+  // instead.
+  void AddCharacter(char c) {
+    ASSERT(c != '\0');
+    ASSERT(!is_finalized() && position_ < buffer_.length());
+    buffer_[position_++] = c;
+  }
+
+  // Add an entire string to the builder. Uses strlen() internally to
+  // compute the length of the input string.
+  void AddString(const char* s) {
+    AddSubstring(s, StrLength(s));
+  }
+
+  // Add the first 'n' characters of the given string 's' to the
+  // builder. The input string must have enough characters.
+  void AddSubstring(const char* s, int n) {
+    ASSERT(!is_finalized() && position_ + n < buffer_.length());
+    ASSERT(static_cast<size_t>(n) <= strlen(s));
+    memmove(&buffer_[position_], s, n * kCharSize);
+    position_ += n;
+  }
+
+
+  // Add character padding to the builder. If count is non-positive,
+  // nothing is added to the builder.
+  void AddPadding(char c, int count) {
+    for (int i = 0; i < count; i++) {
+      AddCharacter(c);
+    }
+  }
+
+  // Finalize the string by 0-terminating it and returning the buffer.
+  char* Finalize() {
+    ASSERT(!is_finalized() && position_ < buffer_.length());
+    buffer_[position_] = '\0';
+    // Make sure nobody managed to add a 0-character to the
+    // buffer while building the string.
+    ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_));
+    position_ = -1;
+    ASSERT(is_finalized());
+    return buffer_.start();
+  }
+
+ private:
+  Vector<char> buffer_;
+  int position_;
+
+  bool is_finalized() const { return position_ < 0; }
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder);
+};
+
+// The type-based aliasing rule allows the compiler to assume that pointers of
+// different types (for some definition of different) never alias each other.
+// Thus the following code does not work:
+//
+// float f = foo();
+// int fbits = *(int*)(&f);
+//
+// The compiler 'knows' that the int pointer can't refer to f since the types
+// don't match, so the compiler may cache f in a register, leaving random data
+// in fbits.  Using C++ style casts makes no difference, however a pointer to
+// char data is assumed to alias any other pointer.  This is the 'memcpy
+// exception'.
+//
+// Bit_cast uses the memcpy exception to move the bits from a variable of one
+// type of a variable of another type.  Of course the end result is likely to
+// be implementation dependent.  Most compilers (gcc-4.2 and MSVC 2005)
+// will completely optimize BitCast away.
+//
+// There is an additional use for BitCast.
+// Recent gccs will warn when they see casts that may result in breakage due to
+// the type-based aliasing rule.  If you have checked that there is no breakage
+// you can use BitCast to cast one pointer type to another.  This confuses gcc
+// enough that it can no longer see that you have cast one pointer type to
+// another thus avoiding the warning.
+template <class Dest, class Source>
+inline Dest BitCast(const Source& source) {
+  // Compile time assertion: sizeof(Dest) == sizeof(Source)
+  // A compile error here means your Dest and Source have different sizes.
+  typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1];
+
+  Dest dest;
+  memmove(&dest, &source, sizeof(dest));
+  return dest;
+}
+
+template <class Dest, class Source>
+inline Dest BitCast(Source* source) {
+  return BitCast<Dest>(reinterpret_cast<uintptr_t>(source));
+}
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_UTILS_H_
diff --git a/klm/util/ersatz_progress.cc b/klm/util/ersatz_progress.cc
index 07b14e26..498ab5c5 100644
--- a/klm/util/ersatz_progress.cc
+++ b/klm/util/ersatz_progress.cc
@@ -9,36 +9,38 @@ namespace util {
 
 namespace { const unsigned char kWidth = 100; }
 
-ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<std::size_t>::max()), complete_(next_), out_(NULL) {}
+const char kProgressBanner[] = "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n";
+
+ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<uint64_t>::max()), complete_(next_), out_(NULL) {}
 
 ErsatzProgress::~ErsatzProgress() {
   if (out_) Finished();
 }
 
-ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std::string &message) 
+ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::string &message) 
   : current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) {
   if (!out_) {
-    next_ = std::numeric_limits<std::size_t>::max();
+    next_ = std::numeric_limits<uint64_t>::max();
     return;
   }
   if (!message.empty()) *out_ << message << '\n';
-  *out_ << "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n";
+  *out_ << kProgressBanner;
 }
 
 void ErsatzProgress::Milestone() {
   if (!out_) { current_ = 0; return; }
   if (!complete_) return;
-  unsigned char stone = std::min(static_cast<std::size_t>(kWidth), (current_ * kWidth) / complete_);
+  unsigned char stone = std::min(static_cast<uint64_t>(kWidth), (current_ * kWidth) / complete_);
 
   for (; stones_written_ < stone; ++stones_written_) {
     (*out_) << '*';
   }
   if (stone == kWidth) {
     (*out_) << std::endl;
-    next_ = std::numeric_limits<std::size_t>::max();
+    next_ = std::numeric_limits<uint64_t>::max();
     out_ = NULL;
   } else {
-    next_ = std::max(next_, (stone * complete_) / kWidth);
+    next_ = std::max(next_, ((stone + 1) * complete_ + kWidth - 1) / kWidth);
   }
 }
 
diff --git a/klm/util/ersatz_progress.hh b/klm/util/ersatz_progress.hh
index f709dc51..b94399a8 100644
--- a/klm/util/ersatz_progress.hh
+++ b/klm/util/ersatz_progress.hh
@@ -4,17 +4,22 @@
 #include <iostream>
 #include <string>
 
+#include <stdint.h>
+
 // Ersatz version of boost::progress so core language model doesn't depend on
 // boost.  Also adds option to print nothing.  
 
 namespace util {
+
+extern const char kProgressBanner[];
+
 class ErsatzProgress {
   public:
     // No output.  
     ErsatzProgress();
 
     // Null means no output.  The null value is useful for passing along the ostream pointer from another caller.   
-    explicit ErsatzProgress(std::size_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
+    explicit ErsatzProgress(uint64_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
 
     ~ErsatzProgress();
 
@@ -23,14 +28,13 @@ class ErsatzProgress {
       return *this;
     }
 
-    ErsatzProgress &operator+=(std::size_t amount) {
+    ErsatzProgress &operator+=(uint64_t amount) {
       if ((current_ += amount) >= next_) Milestone();
       return *this;
     }
 
-    void Set(std::size_t to) {
+    void Set(uint64_t to) {
       if ((current_ = to) >= next_) Milestone();
-      Milestone();
     }
 
     void Finished() {
@@ -40,7 +44,7 @@ class ErsatzProgress {
   private:
     void Milestone();
 
-    std::size_t current_, next_, complete_;
+    uint64_t current_, next_, complete_;
     unsigned char stones_written_;
     std::ostream *out_;
 
diff --git a/klm/util/exception.cc b/klm/util/exception.cc
index c4f8c04c..557c3986 100644
--- a/klm/util/exception.cc
+++ b/klm/util/exception.cc
@@ -79,9 +79,7 @@ ErrnoException::ErrnoException() throw() : errno_(errno) {
 
 ErrnoException::~ErrnoException() throw() {}
 
-EndOfFileException::EndOfFileException() throw() {
-  *this << "End of file";
-}
-EndOfFileException::~EndOfFileException() throw() {}
+OverflowException::OverflowException() throw() {}
+OverflowException::~OverflowException() throw() {}
 
 } // namespace util
diff --git a/klm/util/exception.hh b/klm/util/exception.hh
index 6d6a37cb..74046cf9 100644
--- a/klm/util/exception.hh
+++ b/klm/util/exception.hh
@@ -2,9 +2,12 @@
 #define UTIL_EXCEPTION__
 
 #include <exception>
+#include <limits>
 #include <sstream>
 #include <string>
 
+#include <stdint.h>
+
 namespace util {
 
 template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
@@ -41,7 +44,7 @@ class Exception : public std::exception {
 };
 
 /* This implements the normal operator<< for Exception and all its children. 
- * SNIFAE means it only applies to Exception.  Think of this as an ersatz
+ * SFINAE means it only applies to Exception.  Think of this as an ersatz
  * boost::enable_if.  
  */
 template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data) {
@@ -59,40 +62,43 @@ template <class Except, class Data> typename Except::template ExceptionTag<Excep
 #endif
 #endif
 
-#define UTIL_SET_LOCATION(UTIL_e, child, condition) do { \
-  (UTIL_e).SetLocation(__FILE__, __LINE__, UTIL_FUNC_NAME, (child), (condition)); \
-} while (0)
-
 /* Create an instance of Exception, add the message Modify, and throw it.
  * Modify is appended to the what() message and can contain << for ostream
  * operations.  
  *
  * do .. while kludge to swallow trailing ; character
  * http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html .  
+ * Arg can be a constructor argument to the exception.
  */
-#define UTIL_THROW(Exception, Modify) do { \
-  Exception UTIL_e; \
-  UTIL_SET_LOCATION(UTIL_e, #Exception, NULL); \
+#define UTIL_THROW_BACKEND(Condition, Exception, Arg, Modify) do { \
+  Exception UTIL_e Arg; \
+  UTIL_e.SetLocation(__FILE__, __LINE__, UTIL_FUNC_NAME, #Exception, Condition); \
   UTIL_e << Modify; \
   throw UTIL_e; \
 } while (0)
 
-#define UTIL_THROW_VAR(Var, Modify) do { \
-  Exception &UTIL_e = (Var); \
-  UTIL_SET_LOCATION(UTIL_e, NULL, NULL); \
-  UTIL_e << Modify; \
-  throw UTIL_e; \
-} while (0)
+#define UTIL_THROW_ARG(Exception, Arg, Modify) \
+  UTIL_THROW_BACKEND(NULL, Exception, Arg, Modify)
+
+#define UTIL_THROW(Exception, Modify) \
+  UTIL_THROW_BACKEND(NULL, Exception, , Modify);
 
-#define UTIL_THROW_IF(Condition, Exception, Modify) do { \
-  if (Condition) { \
-    Exception UTIL_e; \
-    UTIL_SET_LOCATION(UTIL_e, #Exception, #Condition); \
-    UTIL_e << Modify; \
-    throw UTIL_e; \
+#if __GNUC__ >= 3
+#define UTIL_UNLIKELY(x) __builtin_expect (!!(x), 0)
+#else
+#define UTIL_UNLIKELY(x) (x)
+#endif
+
+#define UTIL_THROW_IF_ARG(Condition, Exception, Arg, Modify) do { \
+  if (UTIL_UNLIKELY(Condition)) { \
+    UTIL_THROW_BACKEND(#Condition, Exception, Arg, Modify); \
   } \
 } while (0)
 
+#define UTIL_THROW_IF(Condition, Exception, Modify) \
+  UTIL_THROW_IF_ARG(Condition, Exception, , Modify)
+
+// Exception that records errno and adds it to the message.
 class ErrnoException : public Exception {
   public:
     ErrnoException() throw();
@@ -105,12 +111,26 @@ class ErrnoException : public Exception {
     int errno_;
 };
 
-class EndOfFileException : public Exception {
+// Utilities for overflow checking.  
+class OverflowException : public Exception {
   public:
-    EndOfFileException() throw();
-    ~EndOfFileException() throw();
+    OverflowException() throw();
+    ~OverflowException() throw();
 };
 
+template <unsigned len> inline std::size_t CheckOverflowInternal(uint64_t value) {
+  UTIL_THROW_IF(value > static_cast<uint64_t>(std::numeric_limits<std::size_t>::max()), OverflowException, "Integer overflow detected.  This model is too big for 32-bit code.");
+  return value;
+}
+
+template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) {
+  return value;
+}
+
+inline std::size_t CheckOverflow(uint64_t value) {
+  return CheckOverflowInternal<sizeof(std::size_t)>(value);
+}
+
 } // namespace util
 
 #endif // UTIL_EXCEPTION__
diff --git a/klm/util/file.cc b/klm/util/file.cc
index 98f13983..86d9b12d 100644
--- a/klm/util/file.cc
+++ b/klm/util/file.cc
@@ -1,11 +1,17 @@
+#define _LARGEFILE64_SOURCE
+#define _FILE_OFFSET_BITS 64
+
 #include "util/file.hh"
 
 #include "util/exception.hh"
 
 #include <cstdlib>
 #include <cstdio>
+#include <sstream>
 #include <iostream>
 
+#include <assert.h>
+#include <errno.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -14,6 +20,9 @@
 #if defined(_WIN32) || defined(_WIN64)
 #include <windows.h>
 #include <io.h>
+#include <algorithm>
+#include <limits.h>
+#include <limits>
 #else
 #include <unistd.h>
 #endif
@@ -34,6 +43,18 @@ scoped_FILE::~scoped_FILE() {
   }
 }
 
+// Note that ErrnoException records errno before NameFromFD is called.
+FDException::FDException(int fd) throw() : fd_(fd), name_guess_(NameFromFD(fd)) {
+  *this << "in " << name_guess_ << ' ';
+}
+
+FDException::~FDException() throw() {}
+
+EndOfFileException::EndOfFileException() throw() {
+  *this << "End of file";
+}
+EndOfFileException::~EndOfFileException() throw() {}
+
 int OpenReadOrThrow(const char *name) {
   int ret;
 #if defined(_WIN32) || defined(_WIN64)
@@ -47,7 +68,7 @@ int OpenReadOrThrow(const char *name) {
 int CreateOrThrow(const char *name) {
   int ret;
 #if defined(_WIN32) || defined(_WIN64)
-  UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name);
+  UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR | _O_BINARY, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name);
 #else
   UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name);
 #endif
@@ -58,31 +79,58 @@ uint64_t SizeFile(int fd) {
 #if defined(_WIN32) || defined(_WIN64)
   __int64 ret = _filelengthi64(fd);
   return (ret == -1) ? kBadSize : ret;
+#else // Not windows.
+
+#ifdef OS_ANDROID
+  struct stat64 sb;
+  int ret = fstat64(fd, &sb);
 #else
   struct stat sb;
-  if (fstat(fd, &sb) == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
+  int ret = fstat(fd, &sb);
+#endif
+  if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
   return sb.st_size;
 #endif
 }
 
+uint64_t SizeOrThrow(int fd) {
+  uint64_t ret = SizeFile(fd);
+  UTIL_THROW_IF_ARG(ret == kBadSize, FDException, (fd), "Failed to size");
+  return ret;
+}
+
 void ResizeOrThrow(int fd, uint64_t to) {
 #if defined(_WIN32) || defined(_WIN64)
-  UTIL_THROW_IF(_chsize_s(fd, to), ErrnoException, "Resizing to " << to << " bytes failed");
+    errno_t ret = _chsize_s
+#elif defined(OS_ANDROID)
+    int ret = ftruncate64
 #else
-  UTIL_THROW_IF(ftruncate(fd, to), ErrnoException, "Resizing to " << to << " bytes failed");
+    int ret = ftruncate
 #endif
+    (fd, to);
+  UTIL_THROW_IF_ARG(ret, FDException, (fd), "while resizing to " << to << " bytes");
 }
 
-#ifdef WIN32
-typedef int ssize_t;
+std::size_t PartialRead(int fd, void *to, std::size_t amount) {
+#if defined(_WIN32) || defined(_WIN64)
+  amount = min(static_cast<std::size_t>(INT_MAX), amount);
+  int ret = _read(fd, to, amount); 
+#else
+  errno = 0;
+  ssize_t ret;
+  do {
+    ret = read(fd, to, amount);
+  } while (ret == -1 && errno == EINTR);
 #endif
+  UTIL_THROW_IF_ARG(ret < 0, FDException, (fd), "while reading " << amount << " bytes");
+  return static_cast<std::size_t>(ret);
+}
 
 void ReadOrThrow(int fd, void *to_void, std::size_t amount) {
   uint8_t *to = static_cast<uint8_t*>(to_void);
   while (amount) {
-    ssize_t ret = read(fd, to, amount);
-    UTIL_THROW_IF(ret == -1, ErrnoException, "Reading " << amount << " from fd " << fd << " failed.");
-    UTIL_THROW_IF(ret == 0, EndOfFileException, "Hit EOF in fd " << fd << " but there should be " << amount << " more bytes to read.");
+    std::size_t ret = PartialRead(fd, to, amount);
+    UTIL_THROW_IF(ret == 0, EndOfFileException, " in " << NameFromFD(fd) << " but there should be " << amount << " more bytes to read.");
     amount -= ret;
     to += ret;
   }
@@ -92,8 +140,7 @@ std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) {
   uint8_t *to = static_cast<uint8_t*>(to_void);
   std::size_t remaining = amount;
   while (remaining) {
-    ssize_t ret = read(fd, to, remaining);
-    UTIL_THROW_IF(ret == -1, ErrnoException, "Reading " << remaining << " from fd " << fd << " failed.");
+    std::size_t ret = PartialRead(fd, to, remaining);
     if (!ret) return amount - remaining;
     remaining -= ret;
     to += ret;
@@ -101,26 +148,98 @@ std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) {
   return amount;
 }
 
+void PReadOrThrow(int fd, void *to_void, std::size_t size, uint64_t off) {
+  uint8_t *to = static_cast<uint8_t*>(to_void);
+#if defined(_WIN32) || defined(_WIN64)
+  UTIL_THROW(Exception, "This pread implementation for windows is broken.  Please send me a patch that does not change the file pointer.  Atomically.  Or send me an implementation of pwrite that is allowed to change the file pointer but can be called concurrently with pread.");
+  const std::size_t kMaxDWORD = static_cast<std::size_t>(4294967295UL);
+#endif
+  for (;size ;) {
+#if defined(_WIN32) || defined(_WIN64)
+    /* BROKEN: changes file pointer.  Even if you save it and change it back, it won't be safe to use concurrently with write() or read() which lmplz does. */
+    // size_t might be 64-bit.  DWORD is always 32.
+    DWORD reading = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, size));
+    DWORD ret;
+    OVERLAPPED overlapped;
+    memset(&overlapped, 0, sizeof(OVERLAPPED));
+    overlapped.Offset = static_cast<DWORD>(off);
+    overlapped.OffsetHigh = static_cast<DWORD>(off >> 32);
+    UTIL_THROW_IF(!ReadFile((HANDLE)_get_osfhandle(fd), to, reading, &ret, &overlapped), Exception, "ReadFile failed for offset " << off);
+#else
+    ssize_t ret;
+    errno = 0;
+    do {
+#ifdef OS_ANDROID
+      ret = pread64(fd, to, size, off);
+#else
+      ret = pread(fd, to, size, off);
+#endif
+    } while (ret == -1 && errno == EINTR);
+    if (ret <= 0) {
+      UTIL_THROW_IF(ret == 0, EndOfFileException, " for reading " << size << " bytes at " << off << " from " << NameFromFD(fd));
+      UTIL_THROW_ARG(FDException, (fd), "while reading " << size << " bytes at offset " << off);
+    }
+#endif
+    size -= ret;
+    off += ret;
+    to += ret;
+  }
+}
+
 void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
   const uint8_t *data = static_cast<const uint8_t*>(data_void);
   while (size) {
-    ssize_t ret = write(fd, data, size);
-    if (ret < 1) UTIL_THROW(util::ErrnoException, "Write failed");
+#if defined(_WIN32) || defined(_WIN64)
+    int ret = write(fd, data, min(static_cast<std::size_t>(INT_MAX), size));
+#else
+    errno = 0;
+    ssize_t ret;
+    do {
+      ret = write(fd, data, size);
+    } while (ret == -1 && errno == EINTR);
+#endif
+    UTIL_THROW_IF_ARG(ret < 1, FDException, (fd), "while writing " << size << " bytes");
     data += ret;
     size -= ret;
   }
 }
 
+void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
+  if (!size) return;
+  UTIL_THROW_IF(1 != std::fwrite(data, size, 1, to), ErrnoException, "Short write; requested size " << size);
+}
+
 void FSyncOrThrow(int fd) {
 // Apparently windows doesn't have fsync?  
 #if !defined(_WIN32) && !defined(_WIN64)
-  UTIL_THROW_IF(-1 == fsync(fd), ErrnoException, "Sync of " << fd << " failed.");
+  UTIL_THROW_IF_ARG(-1 == fsync(fd), FDException, (fd), "while syncing");
 #endif
 }
 
 namespace {
-void InternalSeek(int fd, off_t off, int whence) {
-  UTIL_THROW_IF((off_t)-1 == lseek(fd, off, whence), ErrnoException, "Seek failed");
+
+// Static assert for 64-bit off_t size.
+#if !defined(_WIN32) && !defined(_WIN64) && !defined(OS_ANDROID)
+template <unsigned> struct CheckOffT;
+template <> struct CheckOffT<8> {
+  struct True {};
+};
+// If there's a compiler error on the next line, then off_t isn't 64 bit.  And
+// that makes me a sad panda.
+typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
+#endif
+
+// Can't we all just get along?  
+void InternalSeek(int fd, int64_t off, int whence) {
+  if (
+#if defined(_WIN32) || defined(_WIN64)
+    (__int64)-1 == _lseeki64(fd, off, whence)
+#elif defined(OS_ANDROID)
+    (off64_t)-1 == lseek64(fd, off, whence)
+#else
+    (off_t)-1 == lseek(fd, off, whence)
+#endif
+  ) UTIL_THROW_ARG(FDException, (fd), "while seeking to " << off << " whence " << whence);
 }
 } // namespace
 
@@ -138,13 +257,16 @@ void SeekEnd(int fd) {
 
 std::FILE *FDOpenOrThrow(scoped_fd &file) {
   std::FILE *ret = fdopen(file.get(), "r+b");
-  if (!ret) UTIL_THROW(util::ErrnoException, "Could not fdopen");
+  UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for write");
   file.release();
   return ret;
 }
 
-TempMaker::TempMaker(const std::string &prefix) : base_(prefix) {
-  base_ += "XXXXXX";
+std::FILE *FDOpenReadOrThrow(scoped_fd &file) {
+  std::FILE *ret = fdopen(file.get(), "rb");
+  UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for read");
+  file.release();
+  return ret;
 }
 
 // Sigh.  Windows temporary file creation is full of race conditions.
@@ -242,7 +364,9 @@ mkstemp_and_unlink(char *tmpl)
 
     /* Modified for windows and to unlink */
     //      fd = open (tmpl, O_RDWR | O_CREAT | O_EXCL, _S_IREAD | _S_IWRITE);
-    fd = _open (tmpl, _O_RDWR | _O_CREAT | _O_TEMPORARY | _O_EXCL | _O_BINARY, _S_IREAD | _S_IWRITE);
+    int flags = _O_RDWR | _O_CREAT | _O_EXCL | _O_BINARY;
+    flags |= _O_TEMPORARY;
+    fd = _open (tmpl, flags, _S_IREAD | _S_IWRITE);
     if (fd >= 0)
     {
       errno = save_errno;
@@ -260,23 +384,94 @@ mkstemp_and_unlink(char *tmpl)
 int
 mkstemp_and_unlink(char *tmpl) {
   int ret = mkstemp(tmpl);
-  if (ret == -1) return -1;
-  UTIL_THROW_IF(unlink(tmpl), util::ErrnoException, "Failed to delete " << tmpl);
+  if (ret != -1) {
+    UTIL_THROW_IF(unlink(tmpl), ErrnoException, "while deleting delete " << tmpl);
+  }
   return ret;
 }
 #endif
 
-int TempMaker::Make() const {
-  std::string copy(base_);
-  copy.push_back(0);
+// If it's a directory, add a /.  This lets users say -T /tmp without creating
+// /tmpAAAAAA
+void NormalizeTempPrefix(std::string &base) {
+  if (base.empty()) return;
+  if (base[base.size() - 1] == '/') return;
+  struct stat sb;
+  // It's fine for it to not exist.
+  if (-1 == stat(base.c_str(), &sb)) return;
+  if (
+#if defined(_WIN32) || defined(_WIN64)
+    sb.st_mode & _S_IFDIR
+#else
+    S_ISDIR(sb.st_mode)
+#endif
+    ) base += '/';
+}
+
+int MakeTemp(const std::string &base) {
+  std::string name(base);
+  name += "XXXXXX";
+  name.push_back(0);
   int ret;
-  UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&copy[0])), util::ErrnoException, "Failed to make a temporary based on " << base_);
+  UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&name[0])), ErrnoException, "while making a temporary based on " << base);
   return ret;
 }
 
-std::FILE *TempMaker::MakeFile() const {
-  util::scoped_fd file(Make());
+std::FILE *FMakeTemp(const std::string &base) {
+  util::scoped_fd file(MakeTemp(base));
   return FDOpenOrThrow(file);
 }
 
+int DupOrThrow(int fd) {
+  int ret = dup(fd);
+  UTIL_THROW_IF_ARG(ret == -1, FDException, (fd), "in duplicating the file descriptor");
+  return ret;
+}
+
+namespace {
+// Try to name things but be willing to fail too.
+bool TryName(int fd, std::string &out) {
+#if defined(_WIN32) || defined(_WIN64)
+  return false;
+#else
+  std::string name("/proc/self/fd/");
+  std::ostringstream convert;
+  convert << fd;
+  name += convert.str();
+  
+  struct stat sb;
+  if (-1 == lstat(name.c_str(), &sb)) 
+    return false;
+  out.resize(sb.st_size + 1);
+  ssize_t ret = readlink(name.c_str(), &out[0], sb.st_size + 1);
+  if (-1 == ret)
+    return false;
+  if (ret > sb.st_size) {
+    // Increased in size?!
+    return false;
+  }
+  out.resize(ret);
+  // Don't use the non-file names.
+  if (!out.empty() && out[0] != '/') 
+    return false;
+  return true;
+#endif
+}
+} // namespace
+
+std::string NameFromFD(int fd) {
+  std::string ret;
+  if (TryName(fd, ret)) return ret;
+  switch (fd) {
+    case 0: return "stdin";
+    case 1: return "stdout";
+    case 2: return "stderr";
+  }
+  ret = "fd ";
+  std::ostringstream convert;
+  convert << fd;
+  ret += convert.str();
+  return ret;
+}
+
 } // namespace util
diff --git a/klm/util/file.hh b/klm/util/file.hh
index 8af1ff4f..be88431d 100644
--- a/klm/util/file.hh
+++ b/klm/util/file.hh
@@ -1,6 +1,8 @@
 #ifndef UTIL_FILE__
 #define UTIL_FILE__
 
+#include "util/exception.hh"
+
 #include <cstddef>
 #include <cstdio>
 #include <string>
@@ -17,7 +19,7 @@ class scoped_fd {
 
     ~scoped_fd();
 
-    void reset(int to) {
+    void reset(int to = -1) {
       scoped_fd other(fd_);
       fd_ = to;
     }
@@ -32,8 +34,6 @@ class scoped_fd {
       return ret;
     }
 
-    operator bool() { return fd_ != -1; }
-
   private:
     int fd_;
 
@@ -65,6 +65,32 @@ class scoped_FILE {
     std::FILE *file_;
 };
 
+/* Thrown for any operation where the fd is known. */
+class FDException : public ErrnoException {
+  public:
+    explicit FDException(int fd) throw();
+
+    virtual ~FDException() throw();
+
+    // This may no longer be valid if the exception was thrown past open.
+    int FD() const { return fd_; }
+
+    // Guess from NameFromFD.
+    const std::string &NameGuess() const { return name_guess_; }
+
+  private:
+    int fd_;
+
+    std::string name_guess_;
+};
+
+// End of file reached.
+class EndOfFileException : public Exception {
+  public:
+    EndOfFileException() throw();
+    ~EndOfFileException() throw();
+};
+
 // Open for read only.  
 int OpenReadOrThrow(const char *name);
 // Create file if it doesn't exist, truncate if it does.  Opened for write.   
@@ -73,13 +99,18 @@ int CreateOrThrow(const char *name);
 // Return value for SizeFile when it can't size properly.  
 const uint64_t kBadSize = (uint64_t)-1;
 uint64_t SizeFile(int fd);
+uint64_t SizeOrThrow(int fd);
 
 void ResizeOrThrow(int fd, uint64_t to);
 
+std::size_t PartialRead(int fd, void *to, std::size_t size);
 void ReadOrThrow(int fd, void *to, std::size_t size);
-std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount);
+std::size_t ReadOrEOF(int fd, void *to_void, std::size_t size);
+// Positioned: unix only for now.  
+void PReadOrThrow(int fd, void *to, std::size_t size, uint64_t off);
 
 void WriteOrThrow(int fd, const void *data_void, std::size_t size);
+void WriteOrThrow(FILE *to, const void *data, std::size_t size);
 
 void FSyncOrThrow(int fd);
 
@@ -89,21 +120,22 @@ void AdvanceOrThrow(int fd, int64_t off);
 void SeekEnd(int fd);
 
 std::FILE *FDOpenOrThrow(scoped_fd &file);
-
-class TempMaker {
-  public:
-    explicit TempMaker(const std::string &prefix);
-
-    // These will already be unlinked for you.  
-    int Make() const;
-    std::FILE *MakeFile() const;
-
-    // This will force you to close the fd instead of leaving it open.  
-    std::string Name(scoped_fd &opened) const;
-
-  private:
-    std::string base_;
-};
+std::FILE *FDOpenReadOrThrow(scoped_fd &file);
+
+// Temporary files
+// Append a / if base is a directory.
+void NormalizeTempPrefix(std::string &base);
+int MakeTemp(const std::string &prefix);
+std::FILE *FMakeTemp(const std::string &prefix);
+
+// dup an fd.
+int DupOrThrow(int fd);
+
+/* Attempt get file name from fd.  This won't always work (i.e. on Windows or
+ * a pipe).  The file might have been renamed.  It's intended for diagnostics
+ * and logging only.
+ */
+std::string NameFromFD(int fd);
 
 } // namespace util
 
diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc
index af341d6d..9de30fc4 100644
--- a/klm/util/file_piece.cc
+++ b/klm/util/file_piece.cc
@@ -1,19 +1,21 @@
 #include "util/file_piece.hh"
 
+#include "util/double-conversion/double-conversion.h"
 #include "util/exception.hh"
 #include "util/file.hh"
 #include "util/mmap.hh"
-#ifdef WIN32
+
+#if defined(_WIN32) || defined(_WIN64)
 #include <io.h>
-#endif // WIN32
+#else
+#include <unistd.h>
+#endif
 
 #include <iostream>
 #include <string>
 #include <limits>
 
-#include <unistd.h>
 #include <assert.h>
-#include <ctype.h>
 #include <fcntl.h>
 #include <stdlib.h>
 #include <sys/types.h>
@@ -25,13 +27,6 @@ ParseNumberException::ParseNumberException(StringPiece value) throw() {
   *this << "Could not parse \"" << value << "\" into a number";
 }
 
-#ifdef HAVE_ZLIB
-GZException::GZException(gzFile file) {
-  int num;
-  *this << gzerror(file, &num) << " from zlib";
-}
-#endif // HAVE_ZLIB
-
 // Sigh this is the only way I could come up with to do a _const_ bool.  It has ' ', '\f', '\n', '\r', '\t', and '\v' (same as isspace on C locale). 
 const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 
@@ -41,26 +36,33 @@ FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t
   Initialize(name, show_progress, min_buffer);
 }
 
-FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer)  : 
+namespace {
+std::string NamePossiblyFind(int fd, const char *name) {
+  if (name) return name;
+  return NameFromFD(fd);
+}
+} // namespace
+
+FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) : 
   file_(fd), total_size_(SizeFile(file_.get())), page_(SizePage()),
-  progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) {
-  Initialize(name, show_progress, min_buffer);
+  progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + NamePossiblyFind(fd, name)) {
+  Initialize(NamePossiblyFind(fd, name).c_str(), show_progress, min_buffer);
 }
 
-FilePiece::~FilePiece() {
-#ifdef HAVE_ZLIB
-  if (gz_file_) {
-    // zlib took ownership
-    file_.release();
-    int ret;
-    if (Z_OK != (ret = gzclose(gz_file_))) {
-      std::cerr << "could not close file " << file_name_ << " using zlib" << std::endl;
-      abort();
-    }
-  }
-#endif
+FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buffer) :
+  total_size_(kBadSize), page_(SizePage()) {
+  InitializeNoRead(name ? name : "istream", min_buffer);
+
+  fallback_to_read_ = true;
+  data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED);
+  position_ = data_.begin();
+  position_end_ = position_;
+  
+  fell_back_.Reset(stream);
 }
 
+FilePiece::~FilePiece() {}
+
 StringPiece FilePiece::ReadLine(char delim) {
   std::size_t skip = 0;
   while (true) {
@@ -93,10 +95,34 @@ unsigned long int FilePiece::ReadULong() {
   return ReadNumber<unsigned long int>();
 }
 
-void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer)  {
-#ifdef HAVE_ZLIB
-  gz_file_ = NULL;
-#endif
+std::size_t FilePiece::Raw(void *to, std::size_t limit) {
+  if (!limit) return 0;
+  std::size_t in_buf = static_cast<std::size_t>(position_end_ - position_);
+  if (in_buf) {
+    std::size_t amount = std::min(in_buf, limit);
+    memcpy(to, position_, amount);
+    position_ += amount;
+    return amount;
+  }
+
+  std::size_t read_return;
+  if (fallback_to_read_) {
+    read_return = fell_back_.Read(to, limit);
+    progress_.Set(fell_back_.RawAmount());
+  } else {
+    uint64_t desired_begin = mapped_offset_ + static_cast<uint64_t>(position_ - data_.begin());
+    SeekOrThrow(file_.get(), desired_begin);
+    read_return = ReadOrEOF(file_.get(), to, limit);
+    // Good thing we never rewind.  This makes desired_begin calculate the right way the next time.
+    mapped_offset_ += static_cast<uint64_t>(read_return);
+    progress_ += read_return;
+  }
+  at_end_ |= (read_return == 0);
+  return read_return;
+}
+
+// Factored out so that istream can call this.
+void FilePiece::InitializeNoRead(const char *name, std::size_t min_buffer) {
   file_name_ = name;
 
   default_map_size_ = page_ * std::max<std::size_t>((min_buffer / page_ + 1), 2);
@@ -104,6 +130,10 @@ void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::s
   position_end_ = NULL;
   mapped_offset_ = 0;
   at_end_ = false;
+}
+
+void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer) {
+  InitializeNoRead(name, min_buffer);
 
   if (total_size_ == kBadSize) {
     // So the assertion passes.  
@@ -116,10 +146,7 @@ void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::s
   }
   Shift();
   // gzip detect.
-  if ((position_end_ - position_) > 2 && *position_ == 0x1f && static_cast<unsigned char>(*(position_ + 1)) == 0x8b) {
-#ifndef HAVE_ZLIB
-    UTIL_THROW(GZException, "Looks like a gzip file but support was not compiled in.");
-#endif
+  if ((position_end_ - position_) >= ReadCompressed::kMagicSize && ReadCompressed::DetectCompressedMagic(position_)) {
     if (!fallback_to_read_) {
       at_end_ = false;
       TransitionToRead();
@@ -128,21 +155,33 @@ void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::s
 }
 
 namespace {
-void ParseNumber(const char *begin, char *&end, float &out) {
-#if defined(sun) || defined(WIN32)
-  out = static_cast<float>(strtod(begin, &end));
-#else
-  out = strtof(begin, &end);
-#endif
+
+static const double_conversion::StringToDoubleConverter kConverter(
+    double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK | double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES,
+    std::numeric_limits<double>::quiet_NaN(),
+    std::numeric_limits<double>::quiet_NaN(),
+    "inf",
+    "NaN");
+
+void ParseNumber(const char *begin, const char *&end, float &out) {
+  int count;
+  out = kConverter.StringToFloat(begin, end - begin, &count);
+  end = begin + count;
 }
-void ParseNumber(const char *begin, char *&end, double &out) {
-  out = strtod(begin, &end);
+void ParseNumber(const char *begin, const char *&end, double &out) {
+  int count;
+  out = kConverter.StringToDouble(begin, end - begin, &count);
+  end = begin + count;
 }
-void ParseNumber(const char *begin, char *&end, long int &out) {
-  out = strtol(begin, &end, 10);
+void ParseNumber(const char *begin, const char *&end, long int &out) {
+  char *silly_end;
+  out = strtol(begin, &silly_end, 10);
+  end = silly_end;
 }
-void ParseNumber(const char *begin, char *&end, unsigned long int &out) {
-  out = strtoul(begin, &end, 10);
+void ParseNumber(const char *begin, const char *&end, unsigned long int &out) {
+  char *silly_end;
+  out = strtoul(begin, &silly_end, 10);
+  end = silly_end;
 }
 } // namespace
 
@@ -152,16 +191,17 @@ template <class T> T FilePiece::ReadNumber() {
     if (at_end_) {
       // Hallucinate a null off the end of the file.
       std::string buffer(position_, position_end_);
-      char *end;
+      const char *buf = buffer.c_str();
+      const char *end = buf + buffer.size();
       T ret;
-      ParseNumber(buffer.c_str(), end, ret);
-      if (buffer.c_str() == end) throw ParseNumberException(buffer);
-      position_ += end - buffer.c_str();
+      ParseNumber(buf, end, ret);
+      if (buf == end) throw ParseNumberException(buffer);
+      position_ += end - buf;
       return ret;
     }
     Shift();
   }
-  char *end;
+  const char *end = last_space_;
   T ret;
   ParseNumber(position_, end, ret);
   if (end == position_) throw ParseNumberException(ReadDelimited());
@@ -196,7 +236,7 @@ void FilePiece::Shift() {
   if (fallback_to_read_) ReadShift();
 
   for (last_space_ = position_end_ - 1; last_space_ >= position_; --last_space_) {
-    if (isspace(*last_space_))  break;
+    if (kSpaces[static_cast<unsigned char>(*last_space_)])  break;
   }
 }
 
@@ -242,22 +282,18 @@ void FilePiece::TransitionToRead() {
   assert(!fallback_to_read_);
   fallback_to_read_ = true;
   data_.reset();
-  data_.reset(malloc(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED);
-  UTIL_THROW_IF(!data_.get(), ErrnoException, "malloc failed for " << default_map_size_);
+  data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED);
   position_ = data_.begin();
   position_end_ = position_;
 
-#ifdef HAVE_ZLIB
-  assert(!gz_file_);
-  gz_file_ = gzdopen(file_.get(), "r");
-  UTIL_THROW_IF(!gz_file_, GZException, "zlib failed to open " << file_name_);
-#endif
+  try {
+    fell_back_.Reset(file_.release());
+  } catch (util::Exception &e) {
+    e << " in file " << file_name_;
+    throw;
+  }
 }
 
-#ifdef WIN32
-typedef int ssize_t;
-#endif
-
 void FilePiece::ReadShift() {
   assert(fallback_to_read_);
   // Bytes [data_.begin(), position_) have been consumed.  
@@ -282,7 +318,7 @@ void FilePiece::ReadShift() {
       position_ = data_.begin();
       position_end_ = position_ + valid_length;
     } else {
-      size_t moving = position_end_ - position_;
+      std::size_t moving = position_end_ - position_;
       memmove(data_.get(), position_, moving);
       position_ = data_.begin();
       position_end_ = position_ + moving;
@@ -290,20 +326,9 @@ void FilePiece::ReadShift() {
     }
   }
 
-  ssize_t read_return;
-#ifdef HAVE_ZLIB
-  read_return = gzread(gz_file_, static_cast<char*>(data_.get()) + already_read, default_map_size_ - already_read);
-  if (read_return == -1) throw GZException(gz_file_);
-  if (total_size_ != kBadSize) {
-    // Just get the position, don't actually seek.  Apparently this is how you do it. . . 
-    off_t ret = lseek(file_.get(), 0, SEEK_CUR);
-    if (ret != -1) progress_.Set(ret);
-  }
-#else
-  read_return = read(file_.get(), static_cast<char*>(data_.get()) + already_read, default_map_size_ - already_read);
-  UTIL_THROW_IF(read_return == -1, ErrnoException, "read failed");
-  progress_.Set(mapped_offset_);
-#endif
+  std::size_t read_return = fell_back_.Read(static_cast<uint8_t*>(data_.get()) + already_read, default_map_size_ - already_read);
+  progress_.Set(fell_back_.RawAmount());
+
   if (read_return == 0) {
     at_end_ = true;
   }
diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh
index af93d8aa..1b110287 100644
--- a/klm/util/file_piece.hh
+++ b/klm/util/file_piece.hh
@@ -4,19 +4,16 @@
 #include "util/ersatz_progress.hh"
 #include "util/exception.hh"
 #include "util/file.hh"
-#include "util/have.hh"
 #include "util/mmap.hh"
+#include "util/read_compressed.hh"
 #include "util/string_piece.hh"
 
 #include <cstddef>
+#include <iosfwd>
 #include <string>
 
 #include <stdint.h>
 
-#ifdef HAVE_ZLIB
-#include <zlib.h>
-#endif
-
 namespace util {
 
 class ParseNumberException : public Exception {
@@ -25,28 +22,26 @@ class ParseNumberException : public Exception {
     ~ParseNumberException() throw() {}
 };
 
-class GZException : public Exception {
-  public:
-#ifdef HAVE_ZLIB
-    explicit GZException(gzFile file);
-#endif
-    GZException() throw() {}
-    ~GZException() throw() {}
-};
-
 extern const bool kSpaces[256];
 
-// Memory backing the returned StringPiece may vanish on the next call.  
+// Memory backing the returned StringPiece may vanish on the next call.
 class FilePiece {
   public:
-    // 32 MB default.
-    explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 33554432);
-    // Takes ownership of fd.  name is used for messages.  
-    explicit FilePiece(int fd, const char *name, std::ostream *show_progress = NULL, std::size_t min_buffer = 33554432);
+    // 1 MB default.
+    explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
+    // Takes ownership of fd.  name is used for messages.
+    explicit FilePiece(int fd, const char *name = NULL, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
+
+    /* Read from an istream.  Don't use this if you can avoid it.  Raw fd IO is
+     * much faster.  But sometimes you just have an istream like Boost's HTTP
+     * server and want to parse it the same way.
+     * name is just used for messages and FileName().
+     */
+    explicit FilePiece(std::istream &stream, const char *name = NULL, std::size_t min_buffer = 1048576);
 
     ~FilePiece();
-     
-    char get() { 
+
+    char get() {
       if (position_ == position_end_) {
         Shift();
         if (at_end_) throw EndOfFileException();
@@ -54,14 +49,14 @@ class FilePiece {
       return *(position_++);
     }
 
-    // Leaves the delimiter, if any, to be returned by get().  Delimiters defined by isspace().  
+    // Leaves the delimiter, if any, to be returned by get().  Delimiters defined by isspace().
     StringPiece ReadDelimited(const bool *delim = kSpaces) {
       SkipSpaces(delim);
       return Consume(FindDelimiterOrEOF(delim));
     }
 
     // Unlike ReadDelimited, this includes leading spaces and consumes the delimiter.
-    // It is similar to getline in that way.  
+    // It is similar to getline in that way.
     StringPiece ReadLine(char delim = '\n');
 
     float ReadFloat();
@@ -69,7 +64,10 @@ class FilePiece {
     long int ReadLong();
     unsigned long int ReadULong();
 
-    // Skip spaces defined by isspace.  
+    // Fake read() function.  Reads up to limit bytes, returning the amount read.  Returns 0 on EOF || limit == 0. 
+    std::size_t Raw(void *to, std::size_t limit);
+
+    // Skip spaces defined by being in delim.
     void SkipSpaces(const bool *delim = kSpaces) {
       for (; ; ++position_) {
         if (position_ == position_end_) Shift();
@@ -82,8 +80,10 @@ class FilePiece {
     }
 
     const std::string &FileName() const { return file_name_; }
-    
+
   private:
+    void InitializeNoRead(const char *name, std::size_t min_buffer);
+    // Calls InitializeNoRead, so don't call both.
     void Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer);
 
     template <class T> T ReadNumber();
@@ -122,9 +122,7 @@ class FilePiece {
 
     std::string file_name_;
 
-#ifdef HAVE_ZLIB
-    gzFile gz_file_;
-#endif // HAVE_ZLIB
+    ReadCompressed fell_back_;
 };
 
 } // namespace util
diff --git a/klm/util/file_piece_test.cc b/klm/util/file_piece_test.cc
index f912e18a..7336007d 100644
--- a/klm/util/file_piece_test.cc
+++ b/klm/util/file_piece_test.cc
@@ -1,6 +1,7 @@
 // Tests might fail if you have creative characters in your path.  Sue me.  
 #include "util/file_piece.hh"
 
+#include "util/file.hh"
 #include "util/scoped.hh"
 
 #define BOOST_TEST_MODULE FilePieceTest
@@ -23,6 +24,20 @@ std::string FileLocation() {
   return ret;
 }
 
+/* istream */
+BOOST_AUTO_TEST_CASE(IStream) {
+  std::fstream ref(FileLocation().c_str(), std::ios::in);
+  std::fstream backing(FileLocation().c_str(), std::ios::in);
+  FilePiece test(backing);
+  std::string ref_line;
+  while (getline(ref, ref_line)) {
+    StringPiece test_line(test.ReadLine());
+    BOOST_CHECK_EQUAL(ref_line, test_line);
+  }
+  BOOST_CHECK_THROW(test.get(), EndOfFileException);
+  BOOST_CHECK_THROW(test.get(), EndOfFileException);
+}
+
 /* mmap implementation */
 BOOST_AUTO_TEST_CASE(MMapReadLine) {
   std::fstream ref(FileLocation().c_str(), std::ios::in);
@@ -38,7 +53,7 @@ BOOST_AUTO_TEST_CASE(MMapReadLine) {
   BOOST_CHECK_THROW(test.get(), EndOfFileException);
 }
 
-#ifndef __APPLE__
+#if !defined(_WIN32) && !defined(_WIN64) && !defined(__APPLE__)
 /* Apple isn't happy with the popen, fileno, dup.  And I don't want to
  * reimplement popen.  This is an issue with the test.  
  */
@@ -65,7 +80,7 @@ BOOST_AUTO_TEST_CASE(StreamReadLine) {
   BOOST_CHECK_THROW(test.get(), EndOfFileException);
   BOOST_REQUIRE(!pclose(catter));
 }
-#endif // __APPLE__
+#endif
 
 #ifdef HAVE_ZLIB
 
diff --git a/klm/util/have.hh b/klm/util/have.hh
index 1d76a7fc..6e18529d 100644
--- a/klm/util/have.hh
+++ b/klm/util/have.hh
@@ -2,22 +2,12 @@
 #ifndef UTIL_HAVE__
 #define UTIL_HAVE__
 
-#ifndef HAVE_ZLIB
-#if !defined(_WIN32) && !defined(_WIN64)
-#define HAVE_ZLIB
-#endif
+#ifdef HAVE_CONFIG_H
+#include "config.h"
 #endif
 
 #ifndef HAVE_ICU
 //#define HAVE_ICU
 #endif
 
-#ifndef HAVE_BOOST
-//#define HAVE_BOOST
-#endif
-
-#ifndef HAVE_THREADS
-//#define HAVE_THREADS
-#endif
-
 #endif // UTIL_HAVE__
diff --git a/klm/util/joint_sort.hh b/klm/util/joint_sort.hh
index cf3d8432..1b43ddcf 100644
--- a/klm/util/joint_sort.hh
+++ b/klm/util/joint_sort.hh
@@ -60,7 +60,7 @@ template <class KeyIter, class ValueIter> class JointProxy {
     JointProxy(const KeyIter &key_iter, const ValueIter &value_iter) : inner_(key_iter, value_iter) {}
     JointProxy(const JointProxy<KeyIter, ValueIter> &other) : inner_(other.inner_) {}
 
-    operator const value_type() const {
+    operator value_type() const {
       value_type ret;
       ret.key = *inner_.key_;
       ret.value = *inner_.value_;
@@ -121,7 +121,7 @@ template <class Proxy, class Less> class LessWrapper : public std::binary_functi
 
 template <class KeyIter, class ValueIter> class PairedIterator : public ProxyIterator<detail::JointProxy<KeyIter, ValueIter> > {
   public:
-    PairedIterator(const KeyIter &key, const ValueIter &value) : 
+    PairedIterator(const KeyIter &key, const ValueIter &value) :
       ProxyIterator<detail::JointProxy<KeyIter, ValueIter> >(detail::JointProxy<KeyIter, ValueIter>(key, value)) {}
 };
 
diff --git a/klm/util/multi_intersection.hh b/klm/util/multi_intersection.hh
new file mode 100644
index 00000000..8334d39d
--- /dev/null
+++ b/klm/util/multi_intersection.hh
@@ -0,0 +1,80 @@
+#ifndef UTIL_MULTI_INTERSECTION__
+#define UTIL_MULTI_INTERSECTION__
+
+#include <boost/optional.hpp>
+#include <boost/range/iterator_range.hpp>
+
+#include <algorithm>
+#include <functional>
+#include <vector>
+
+namespace util {
+
+namespace detail {
+template <class Range> struct RangeLessBySize : public std::binary_function<const Range &, const Range &, bool> {
+  bool operator()(const Range &left, const Range &right) const {
+    return left.size() < right.size();
+  }
+};
+
+/* Takes sets specified by their iterators and a boost::optional containing
+ * the lowest intersection if any.  Each set must be sorted in increasing
+ * order.  sets is changed to truncate the beginning of each sequence to the
+ * location of the match or an empty set.  Precondition: sets is not empty
+ * since the intersection over null is the universe and this function does not
+ * know the universe.   
+ */
+template <class Iterator, class Less> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersectionSorted(std::vector<boost::iterator_range<Iterator> > &sets, const Less &less = std::less<typename std::iterator_traits<Iterator>::value_type>()) {
+  typedef std::vector<boost::iterator_range<Iterator> > Sets;
+  typedef typename std::iterator_traits<Iterator>::value_type Value;
+
+  assert(!sets.empty());
+
+  if (sets.front().empty()) return boost::optional<Value>();
+  // Possibly suboptimal to copy for general Value; makes unsigned int go slightly faster.  
+  Value highest(sets.front().front());
+  for (typename Sets::iterator i(sets.begin()); i != sets.end(); ) {
+    i->advance_begin(std::lower_bound(i->begin(), i->end(), highest, less) - i->begin());
+    if (i->empty()) return boost::optional<Value>();
+    if (less(highest, i->front())) {
+      highest = i->front();
+      // start over
+      i = sets.begin();
+    } else {
+      ++i;
+    }
+  }
+  return boost::optional<Value>(highest);
+}
+
+} // namespace detail
+
+template <class Iterator, class Less> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersection(std::vector<boost::iterator_range<Iterator> > &sets, const Less less) {
+  assert(!sets.empty());
+
+  std::sort(sets.begin(), sets.end(), detail::RangeLessBySize<boost::iterator_range<Iterator> >());
+  return detail::FirstIntersectionSorted(sets, less);
+}
+
+template <class Iterator> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersection(std::vector<boost::iterator_range<Iterator> > &sets) {
+  return FirstIntersection(sets, std::less<typename std::iterator_traits<Iterator>::value_type>());
+}
+
+template <class Iterator, class Output, class Less> void AllIntersection(std::vector<boost::iterator_range<Iterator> > &sets, Output &out, const Less less) {
+  typedef typename std::iterator_traits<Iterator>::value_type Value;
+  assert(!sets.empty());
+
+  std::sort(sets.begin(), sets.end(), detail::RangeLessBySize<boost::iterator_range<Iterator> >());
+  boost::optional<Value> ret;
+  for (boost::optional<Value> ret; ret = detail::FirstIntersectionSorted(sets, less); sets.front().advance_begin(1)) {
+    out(*ret);
+  }
+}
+
+template <class Iterator, class Output> void AllIntersection(std::vector<boost::iterator_range<Iterator> > &sets, Output &out) {
+  AllIntersection(sets, out, std::less<typename std::iterator_traits<Iterator>::value_type>());
+}
+
+} // namespace util
+
+#endif // UTIL_MULTI_INTERSECTION__
diff --git a/klm/util/multi_intersection_test.cc b/klm/util/multi_intersection_test.cc
new file mode 100644
index 00000000..970afc17
--- /dev/null
+++ b/klm/util/multi_intersection_test.cc
@@ -0,0 +1,63 @@
+#include "util/multi_intersection.hh"
+
+#define BOOST_TEST_MODULE MultiIntersectionTest
+#include <boost/test/unit_test.hpp>
+
+namespace util {
+namespace {
+
+BOOST_AUTO_TEST_CASE(Empty) {
+  std::vector<boost::iterator_range<const unsigned int*> > sets;
+  
+  sets.push_back(boost::iterator_range<const unsigned int*>(static_cast<const unsigned int*>(NULL), static_cast<const unsigned int*>(NULL)));
+  BOOST_CHECK(!FirstIntersection(sets));
+}
+
+BOOST_AUTO_TEST_CASE(Single) {
+  std::vector<unsigned int> nums;
+  nums.push_back(1);
+  nums.push_back(4);
+  nums.push_back(100);
+  std::vector<boost::iterator_range<std::vector<unsigned int>::const_iterator> > sets;
+  sets.push_back(nums);
+
+  boost::optional<unsigned int> ret(FirstIntersection(sets));
+
+  BOOST_REQUIRE(ret);
+  BOOST_CHECK_EQUAL(static_cast<unsigned int>(1), *ret);
+}
+
+template <class T, unsigned int len> boost::iterator_range<const T*> RangeFromArray(const T (&arr)[len]) {
+  return boost::iterator_range<const T*>(arr, arr + len);
+}
+
+BOOST_AUTO_TEST_CASE(MultiNone) {
+  unsigned int nums0[] = {1, 3, 4, 22};
+  unsigned int nums1[] = {2, 5, 12};
+  unsigned int nums2[] = {4, 17};
+
+  std::vector<boost::iterator_range<const unsigned int*> > sets;
+  sets.push_back(RangeFromArray(nums0));
+  sets.push_back(RangeFromArray(nums1));
+  sets.push_back(RangeFromArray(nums2));
+
+  BOOST_CHECK(!FirstIntersection(sets));
+}
+
+BOOST_AUTO_TEST_CASE(MultiOne) {
+  unsigned int nums0[] = {1, 3, 4, 17, 22};
+  unsigned int nums1[] = {2, 5, 12, 17};
+  unsigned int nums2[] = {4, 17};
+
+  std::vector<boost::iterator_range<const unsigned int*> > sets;
+  sets.push_back(RangeFromArray(nums0));
+  sets.push_back(RangeFromArray(nums1));
+  sets.push_back(RangeFromArray(nums2));
+
+  boost::optional<unsigned int> ret(FirstIntersection(sets));
+  BOOST_REQUIRE(ret);
+  BOOST_CHECK_EQUAL(static_cast<unsigned int>(17), *ret);
+}
+
+} // namespace
+} // namespace util
diff --git a/klm/util/pcqueue.hh b/klm/util/pcqueue.hh
new file mode 100644
index 00000000..3df8749b
--- /dev/null
+++ b/klm/util/pcqueue.hh
@@ -0,0 +1,105 @@
+#ifndef UTIL_PCQUEUE__
+#define UTIL_PCQUEUE__
+
+#include <boost/interprocess/sync/interprocess_semaphore.hpp>
+#include <boost/scoped_array.hpp>
+#include <boost/thread/mutex.hpp>
+#include <boost/utility.hpp>
+
+#include <errno.h>
+
+namespace util {
+
+inline void WaitSemaphore (boost::interprocess::interprocess_semaphore &on) {
+  while (1) {
+    try {
+      on.wait();
+      break;
+    }
+    catch (boost::interprocess::interprocess_exception &e) {
+      if (e.get_native_error() != EINTR) throw;
+    }
+  }
+}
+
+/* Producer consumer queue safe for multiple producers and multiple consumers.
+ * T must be default constructable and have operator=.  
+ * The value is copied twice for Consume(T &out) or three times for Consume(),
+ * so larger objects should be passed via pointer.
+ * Strong exception guarantee if operator= throws.  Undefined if semaphores throw.  
+ */
+template <class T> class PCQueue : boost::noncopyable {
+ public:
+  explicit PCQueue(size_t size)
+   : empty_(size), used_(0),
+     storage_(new T[size]),
+     end_(storage_.get() + size),
+     produce_at_(storage_.get()),
+     consume_at_(storage_.get()) {}
+
+  // Add a value to the queue.
+  void Produce(const T &val) {
+    WaitSemaphore(empty_);
+    {
+      boost::unique_lock<boost::mutex> produce_lock(produce_at_mutex_);
+      try {
+        *produce_at_ = val;
+      }
+      catch (...) {
+        empty_.post();
+        throw;
+      }
+      if (++produce_at_ == end_) produce_at_ = storage_.get();
+    }
+    used_.post();
+  }
+
+  // Consume a value, assigning it to out.
+  T& Consume(T &out) {
+    WaitSemaphore(used_);
+    {
+      boost::unique_lock<boost::mutex> consume_lock(consume_at_mutex_);
+      try {
+        out = *consume_at_;
+      }
+      catch (...) {
+        used_.post();
+        throw;
+      }
+      if (++consume_at_ == end_) consume_at_ = storage_.get();
+    }
+    empty_.post();
+    return out;
+  }
+
+  // Convenience version of Consume that copies the value to return.
+  // The other version is faster.
+  T Consume() {
+    T ret;
+    Consume(ret);
+    return ret;
+  }
+   
+ private:
+  // Number of empty spaces in storage_.
+  boost::interprocess::interprocess_semaphore empty_;
+  // Number of occupied spaces in storage_.
+  boost::interprocess::interprocess_semaphore used_;
+
+  boost::scoped_array<T> storage_;
+
+  T *const end_;
+
+  // Index for next write in storage_.
+  T *produce_at_;
+  boost::mutex produce_at_mutex_;
+
+  // Index for next read from storage_.
+  T *consume_at_;
+  boost::mutex consume_at_mutex_;
+
+};
+
+} // namespace util
+
+#endif // UTIL_PCQUEUE__
diff --git a/klm/util/pool.cc b/klm/util/pool.cc
new file mode 100644
index 00000000..429ba158
--- /dev/null
+++ b/klm/util/pool.cc
@@ -0,0 +1,36 @@
+#include "util/pool.hh"
+
+#include "util/scoped.hh"
+
+#include <stdlib.h>
+
+namespace util {
+
+Pool::Pool() {
+  current_ = NULL;
+  current_end_ = NULL;
+}
+
+Pool::~Pool() {
+  FreeAll();
+}
+
+void Pool::FreeAll() {
+  for (std::vector<void *>::const_iterator i(free_list_.begin()); i != free_list_.end(); ++i) {
+    free(*i);
+  }
+  free_list_.clear();
+  current_ = NULL;
+  current_end_ = NULL;
+}
+
+void *Pool::More(std::size_t size) {
+  std::size_t amount = std::max(static_cast<size_t>(32) << free_list_.size(), size);
+  uint8_t *ret = static_cast<uint8_t*>(MallocOrThrow(amount));
+  free_list_.push_back(ret);
+  current_ = ret + size;
+  current_end_ = ret + amount;
+  return ret;
+}
+
+} // namespace util
diff --git a/klm/util/pool.hh b/klm/util/pool.hh
new file mode 100644
index 00000000..72f8a0c8
--- /dev/null
+++ b/klm/util/pool.hh
@@ -0,0 +1,45 @@
+// Very simple pool.  It can only allocate memory.  And all of the memory it
+// allocates must be freed at the same time.  
+
+#ifndef UTIL_POOL__
+#define UTIL_POOL__
+
+#include <vector>
+
+#include <stdint.h>
+
+namespace util {
+
+class Pool {
+  public:
+    Pool();
+
+    ~Pool();
+
+    void *Allocate(std::size_t size) {
+      void *ret = current_;
+      current_ += size;
+      if (current_ < current_end_) {
+        return ret;
+      } else {
+        return More(size);
+      }
+    }
+
+    void FreeAll();
+
+  private:
+    void *More(std::size_t size);
+
+    std::vector<void *> free_list_;
+
+    uint8_t *current_, *current_end_;
+
+    // no copying
+    Pool(const Pool &);
+    Pool &operator=(const Pool &);
+}; 
+
+} // namespace util
+
+#endif // UTIL_POOL__
diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh
index 3354b68e..6780489d 100644
--- a/klm/util/probing_hash_table.hh
+++ b/klm/util/probing_hash_table.hh
@@ -8,6 +8,7 @@
 #include <functional>
 
 #include <assert.h>
+#include <stdint.h>
 
 namespace util {
 
@@ -42,8 +43,8 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
     typedef EqualT Equal;
 
   public:
-    static std::size_t Size(std::size_t entries, float multiplier) {
-      std::size_t buckets = std::max(entries + 1, static_cast<std::size_t>(multiplier * static_cast<float>(entries)));
+    static uint64_t Size(uint64_t entries, float multiplier) {
+      uint64_t buckets = std::max(entries + 1, static_cast<uint64_t>(multiplier * static_cast<float>(entries)));
       return buckets * sizeof(Entry);
     }
 
@@ -125,6 +126,11 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
       }    
     }
 
+    void Clear(Entry invalid) {
+      std::fill(begin_, end_, invalid);
+      entries_ = 0;
+    }
+
   private:
     MutableIterator begin_;
     std::size_t buckets_;
diff --git a/klm/util/read_compressed.cc b/klm/util/read_compressed.cc
new file mode 100644
index 00000000..b81549e4
--- /dev/null
+++ b/klm/util/read_compressed.cc
@@ -0,0 +1,429 @@
+#include "util/read_compressed.hh"
+
+#include "util/file.hh"
+#include "util/have.hh"
+#include "util/scoped.hh"
+
+#include <algorithm>
+#include <iostream>
+
+#include <assert.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef HAVE_ZLIB
+#include <zlib.h>
+#endif
+
+#ifdef HAVE_BZLIB
+#include <bzlib.h>
+#endif
+
+#ifdef HAVE_XZLIB
+#include <lzma.h>
+#endif
+
+namespace util {
+
+CompressedException::CompressedException() throw() {}
+CompressedException::~CompressedException() throw() {}
+
+GZException::GZException() throw() {}
+GZException::~GZException() throw() {}
+
+BZException::BZException() throw() {}
+BZException::~BZException() throw() {}
+
+XZException::XZException() throw() {}
+XZException::~XZException() throw() {}
+
+class ReadBase {
+  public:
+    virtual ~ReadBase() {}
+
+    virtual std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) = 0;
+
+  protected:
+    static void ReplaceThis(ReadBase *with, ReadCompressed &thunk) {
+      thunk.internal_.reset(with);
+    }
+
+    static uint64_t &ReadCount(ReadCompressed &thunk) {
+      return thunk.raw_amount_;
+    }
+};
+
+namespace {
+
+// Completed file that other classes can thunk to.  
+class Complete : public ReadBase {
+  public:
+    std::size_t Read(void *, std::size_t, ReadCompressed &) {
+      return 0;
+    }
+};
+
+class Uncompressed : public ReadBase {
+  public:
+    explicit Uncompressed(int fd) : fd_(fd) {}
+
+    std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
+      std::size_t got = PartialRead(fd_.get(), to, amount);
+      ReadCount(thunk) += got;
+      return got;
+    }
+
+  private:
+    scoped_fd fd_;
+};
+
+class UncompressedWithHeader : public ReadBase {
+  public:
+    UncompressedWithHeader(int fd, void *already_data, std::size_t already_size) : fd_(fd) {
+      assert(already_size);
+      buf_.reset(malloc(already_size));
+      if (!buf_.get()) throw std::bad_alloc();
+      memcpy(buf_.get(), already_data, already_size);
+      remain_ = static_cast<uint8_t*>(buf_.get());
+      end_ = remain_ + already_size;
+    }
+
+    std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
+      assert(buf_.get());
+      std::size_t sending = std::min<std::size_t>(amount, end_ - remain_);
+      memcpy(to, remain_, sending);
+      remain_ += sending;
+      if (remain_ == end_) {
+        ReplaceThis(new Uncompressed(fd_.release()), thunk);
+      }
+      return sending;
+    }
+
+  private:
+    scoped_malloc buf_;
+    uint8_t *remain_;
+    uint8_t *end_;
+
+    scoped_fd fd_;
+};
+
+#ifdef HAVE_ZLIB
+class GZip : public ReadBase {
+  private:
+    static const std::size_t kInputBuffer = 16384;
+  public:
+    GZip(int fd, void *already_data, std::size_t already_size) 
+      : file_(fd), in_buffer_(malloc(kInputBuffer)) {
+      if (!in_buffer_.get()) throw std::bad_alloc();
+      assert(already_size < kInputBuffer);
+      if (already_size) {
+        memcpy(in_buffer_.get(), already_data, already_size);
+        stream_.next_in = static_cast<Bytef *>(in_buffer_.get());
+        stream_.avail_in = already_size;
+        stream_.avail_in += ReadOrEOF(file_.get(), static_cast<uint8_t*>(in_buffer_.get()) + already_size, kInputBuffer - already_size);
+      } else {
+        stream_.avail_in = 0;
+      }
+      stream_.zalloc = Z_NULL;
+      stream_.zfree = Z_NULL;
+      stream_.opaque = Z_NULL;
+      stream_.msg = NULL;
+      // 32 for zlib and gzip decoding with automatic header detection.  
+      // 15 for maximum window size.  
+      UTIL_THROW_IF(Z_OK != inflateInit2(&stream_, 32 + 15), GZException, "Failed to initialize zlib.");
+    }
+
+    ~GZip() {
+      if (Z_OK != inflateEnd(&stream_)) {
+        std::cerr << "zlib could not close properly." << std::endl;
+        abort();
+      }
+    }
+
+    std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
+      if (amount == 0) return 0;
+      stream_.next_out = static_cast<Bytef*>(to);
+      stream_.avail_out = std::min<std::size_t>(std::numeric_limits<uInt>::max(), amount);
+      do {
+        if (!stream_.avail_in) ReadInput(thunk);
+        int result = inflate(&stream_, 0);
+        switch (result) {
+          case Z_OK:
+            break;
+          case Z_STREAM_END:
+            {
+              std::size_t ret = static_cast<uint8_t*>(stream_.next_out) - static_cast<uint8_t*>(to);
+              ReplaceThis(new Complete(), thunk);
+              return ret;
+            }
+          case Z_ERRNO:
+            UTIL_THROW(ErrnoException, "zlib error");
+          default:
+            UTIL_THROW(GZException, "zlib encountered " << (stream_.msg ? stream_.msg : "an error ") << " code " << result);
+        }
+      } while (stream_.next_out == to);
+      return static_cast<uint8_t*>(stream_.next_out) - static_cast<uint8_t*>(to);
+    }
+
+  private:
+    void ReadInput(ReadCompressed &thunk) {
+      assert(!stream_.avail_in);
+      stream_.next_in = static_cast<Bytef *>(in_buffer_.get());
+      stream_.avail_in = ReadOrEOF(file_.get(), in_buffer_.get(), kInputBuffer);
+      ReadCount(thunk) += stream_.avail_in;
+    }
+
+    scoped_fd file_;
+    scoped_malloc in_buffer_;
+    z_stream stream_;
+};
+#endif // HAVE_ZLIB
+
+#ifdef HAVE_BZLIB
+class BZip : public ReadBase {
+  public:
+    explicit BZip(int fd, void *already_data, std::size_t already_size) {
+      scoped_fd hold(fd);
+      closer_.reset(FDOpenReadOrThrow(hold));
+      int bzerror = BZ_OK;
+      file_ = BZ2_bzReadOpen(&bzerror, closer_.get(), 0, 0, already_data, already_size);
+      switch (bzerror) {
+        case BZ_OK:
+          return;
+        case BZ_CONFIG_ERROR:
+          UTIL_THROW(BZException, "Looks like bzip2 was miscompiled.");
+        case BZ_PARAM_ERROR:
+          UTIL_THROW(BZException, "Parameter error");
+        case BZ_IO_ERROR:
+          UTIL_THROW(BZException, "IO error reading file");
+        case BZ_MEM_ERROR:
+          throw std::bad_alloc();
+      }
+    }
+
+    ~BZip() {
+      int bzerror = BZ_OK;
+      BZ2_bzReadClose(&bzerror, file_);
+      if (bzerror != BZ_OK) {
+        std::cerr << "bz2 readclose error" << std::endl;
+        abort();
+      }
+    }
+
+    std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
+      int bzerror = BZ_OK;
+      int ret = BZ2_bzRead(&bzerror, file_, to, std::min<std::size_t>(static_cast<std::size_t>(INT_MAX), amount));
+      long pos;
+      switch (bzerror) {
+        case BZ_STREAM_END:
+          pos = ftell(closer_.get());
+          if (pos != -1) ReadCount(thunk) = pos;
+          ReplaceThis(new Complete(), thunk);
+          return ret;
+        case BZ_OK:
+          pos = ftell(closer_.get());
+          if (pos != -1) ReadCount(thunk) = pos;
+          return ret;
+        default:
+          UTIL_THROW(BZException, "bzip2 error " << BZ2_bzerror(file_, &bzerror) << " code " << bzerror);
+      }
+    }
+
+  private:
+    scoped_FILE closer_;
+    BZFILE *file_;
+};
+#endif // HAVE_BZLIB
+
+#ifdef HAVE_XZLIB
+class XZip : public ReadBase {
+  private:
+    static const std::size_t kInputBuffer = 16384;
+  public:
+    XZip(int fd, void *already_data, std::size_t already_size) 
+      : file_(fd), in_buffer_(malloc(kInputBuffer)), stream_(), action_(LZMA_RUN) {
+      if (!in_buffer_.get()) throw std::bad_alloc();
+      assert(already_size < kInputBuffer);
+      if (already_size) {
+        memcpy(in_buffer_.get(), already_data, already_size);
+        stream_.next_in = static_cast<const uint8_t*>(in_buffer_.get());
+        stream_.avail_in = already_size;
+        stream_.avail_in += ReadOrEOF(file_.get(), static_cast<uint8_t*>(in_buffer_.get()) + already_size, kInputBuffer - already_size);
+      } else {
+        stream_.avail_in = 0;
+      }
+      stream_.allocator = NULL;
+      lzma_ret ret = lzma_stream_decoder(&stream_, UINT64_MAX, LZMA_CONCATENATED);
+      switch (ret) {
+        case LZMA_OK:
+          break;
+        case LZMA_MEM_ERROR:
+          UTIL_THROW(ErrnoException, "xz open error");
+        default:
+          UTIL_THROW(XZException, "xz error code " << ret);
+      }
+    }
+
+    ~XZip() {
+      lzma_end(&stream_);
+    }
+
+    std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
+      if (amount == 0) return 0;
+      stream_.next_out = static_cast<uint8_t*>(to);
+      stream_.avail_out = amount;
+      do {
+        if (!stream_.avail_in) ReadInput(thunk);
+        lzma_ret status = lzma_code(&stream_, action_);
+        switch (status) {
+          case LZMA_OK:
+            break;
+          case LZMA_STREAM_END:
+            UTIL_THROW_IF(action_ != LZMA_FINISH, XZException, "Input not finished yet.");
+            {
+              std::size_t ret = static_cast<uint8_t*>(stream_.next_out) - static_cast<uint8_t*>(to);
+              ReplaceThis(new Complete(), thunk);
+              return ret;
+            }
+          case LZMA_MEM_ERROR:
+            throw std::bad_alloc();
+          case LZMA_FORMAT_ERROR:
+            UTIL_THROW(XZException, "xzlib says file format not recognized");
+          case LZMA_OPTIONS_ERROR:
+            UTIL_THROW(XZException, "xzlib says unsupported compression options");
+          case LZMA_DATA_ERROR:
+            UTIL_THROW(XZException, "xzlib says this file is corrupt");
+          case LZMA_BUF_ERROR:
+            UTIL_THROW(XZException, "xzlib says unexpected end of input");
+          default:
+            UTIL_THROW(XZException, "unrecognized xzlib error " << status);
+        }
+      } while (stream_.next_out == to);
+      return static_cast<uint8_t*>(stream_.next_out) - static_cast<uint8_t*>(to);
+    }
+
+  private:
+    void ReadInput(ReadCompressed &thunk) {
+      assert(!stream_.avail_in);
+      stream_.next_in = static_cast<const uint8_t*>(in_buffer_.get());
+      stream_.avail_in = ReadOrEOF(file_.get(), in_buffer_.get(), kInputBuffer);
+      if (!stream_.avail_in) action_ = LZMA_FINISH;
+      ReadCount(thunk) += stream_.avail_in;
+    }
+
+    scoped_fd file_;
+    scoped_malloc in_buffer_;
+    lzma_stream stream_;
+
+    lzma_action action_;
+};
+#endif // HAVE_XZLIB
+
+class IStreamReader : public ReadBase {
+  public:
+    explicit IStreamReader(std::istream &stream) : stream_(stream) {}
+
+    std::size_t Read(void *to, std::size_t amount, ReadCompressed &thunk) {
+      if (!stream_.read(static_cast<char*>(to), amount)) {
+        UTIL_THROW_IF(!stream_.eof(), ErrnoException, "istream error");
+        amount = stream_.gcount();
+      }
+      ReadCount(thunk) += amount;
+      return amount;
+    }
+
+  private:
+    std::istream &stream_;
+};
+
+enum MagicResult {
+  UNKNOWN, GZIP, BZIP, XZIP
+};
+
+MagicResult DetectMagic(const void *from_void) {
+  const uint8_t *header = static_cast<const uint8_t*>(from_void);
+  if (header[0] == 0x1f && header[1] == 0x8b) {
+    return GZIP;
+  }
+  if (header[0] == 'B' && header[1] == 'Z' && header[2] == 'h') {
+    return BZIP;
+  }
+  const uint8_t xzmagic[6] = { 0xFD, '7', 'z', 'X', 'Z', 0x00 };
+  if (!memcmp(header, xzmagic, 6)) {
+    return XZIP;
+  }
+  return UNKNOWN;
+}
+
+ReadBase *ReadFactory(int fd, uint64_t &raw_amount) {
+  scoped_fd hold(fd);
+  unsigned char header[ReadCompressed::kMagicSize];
+  raw_amount = ReadOrEOF(fd, header, ReadCompressed::kMagicSize);
+  if (!raw_amount)
+    return new Uncompressed(hold.release());
+  if (raw_amount != ReadCompressed::kMagicSize)
+    return new UncompressedWithHeader(hold.release(), header, raw_amount);
+  switch (DetectMagic(header)) {
+    case GZIP:
+#ifdef HAVE_ZLIB
+      return new GZip(hold.release(), header, ReadCompressed::kMagicSize);
+#else
+      UTIL_THROW(CompressedException, "This looks like a gzip file but gzip support was not compiled in.");
+#endif
+    case BZIP:
+#ifdef HAVE_BZLIB
+      return new BZip(hold.release(), header, ReadCompressed::kMagicSize);
+#else
+      UTIL_THROW(CompressedException, "This looks like a bzip file (it begins with BZ), but bzip support was not compiled in.");
+#endif
+    case XZIP:
+#ifdef HAVE_XZLIB
+      return new XZip(hold.release(), header, ReadCompressed::kMagicSize);
+#else
+      UTIL_THROW(CompressedException, "This looks like an xz file, but xz support was not compiled in.");
+#endif
+    case UNKNOWN:
+      break;
+  }
+  try {
+    SeekOrThrow(fd, 0);
+  } catch (const util::ErrnoException &e) {
+    return new UncompressedWithHeader(hold.release(), header, ReadCompressed::kMagicSize);
+  }
+  return new Uncompressed(hold.release());
+}
+
+} // namespace
+
+bool ReadCompressed::DetectCompressedMagic(const void *from_void) {
+  return DetectMagic(from_void) != UNKNOWN;
+}
+
+ReadCompressed::ReadCompressed(int fd) {
+  Reset(fd);
+}
+
+ReadCompressed::ReadCompressed(std::istream &in) {
+  Reset(in);
+}
+
+ReadCompressed::ReadCompressed() {}
+
+ReadCompressed::~ReadCompressed() {}
+
+void ReadCompressed::Reset(int fd) {
+  internal_.reset();
+  internal_.reset(ReadFactory(fd, raw_amount_));
+}
+
+void ReadCompressed::Reset(std::istream &in) {
+  internal_.reset();
+  internal_.reset(new IStreamReader(in));
+}
+
+std::size_t ReadCompressed::Read(void *to, std::size_t amount) {
+  return internal_->Read(to, amount, *this);
+}
+
+} // namespace util
diff --git a/klm/util/read_compressed.hh b/klm/util/read_compressed.hh
new file mode 100644
index 00000000..8b54c9e8
--- /dev/null
+++ b/klm/util/read_compressed.hh
@@ -0,0 +1,81 @@
+#ifndef UTIL_READ_COMPRESSED__
+#define UTIL_READ_COMPRESSED__
+
+#include "util/exception.hh"
+#include "util/scoped.hh"
+
+#include <cstddef>
+
+#include <stdint.h>
+
+namespace util {
+
+class CompressedException : public Exception {
+  public:
+    CompressedException() throw();
+    virtual ~CompressedException() throw();
+};
+
+class GZException : public CompressedException {
+  public:
+    GZException() throw();
+    ~GZException() throw();
+};
+
+class BZException : public CompressedException {
+  public:
+    BZException() throw();
+    ~BZException() throw();
+};
+
+class XZException : public CompressedException {
+  public:
+    XZException() throw();
+    ~XZException() throw();
+};
+
+class ReadBase;
+
+class ReadCompressed {
+  public:
+    static const std::size_t kMagicSize = 6;
+    // Must have at least kMagicSize bytes.  
+    static bool DetectCompressedMagic(const void *from);
+
+    // Takes ownership of fd.   
+    explicit ReadCompressed(int fd);
+
+    // Try to avoid using this.  Use the fd instead.
+    // There is no decompression support for istreams.
+    explicit ReadCompressed(std::istream &in);
+
+    // Must call Reset later.
+    ReadCompressed();
+
+    ~ReadCompressed();
+
+    // Takes ownership of fd.  
+    void Reset(int fd);
+
+    // Same advice as the constructor.
+    void Reset(std::istream &in);
+
+    std::size_t Read(void *to, std::size_t amount);
+
+    uint64_t RawAmount() const { return raw_amount_; }
+
+  private:
+    friend class ReadBase;
+
+    scoped_ptr<ReadBase> internal_;
+
+    uint64_t raw_amount_;
+
+    // No copying.  
+    ReadCompressed(const ReadCompressed &);
+    void operator=(const ReadCompressed &);
+};
+
+} // namespace util
+
+#endif // UTIL_READ_COMPRESSED__
diff --git a/klm/util/read_compressed_test.cc b/klm/util/read_compressed_test.cc
new file mode 100644
index 00000000..9cb4a4b9
--- /dev/null
+++ b/klm/util/read_compressed_test.cc
@@ -0,0 +1,109 @@
+#include "util/read_compressed.hh"
+
+#include "util/file.hh"
+#include "util/have.hh"
+
+#define BOOST_TEST_MODULE ReadCompressedTest
+#include <boost/test/unit_test.hpp>
+#include <boost/scoped_ptr.hpp>
+
+#include <fstream>
+#include <string>
+
+#include <stdlib.h>
+
+namespace util {
+namespace {
+
+void ReadLoop(ReadCompressed &reader, void *to_void, std::size_t amount) {
+  uint8_t *to = static_cast<uint8_t*>(to_void);
+  while (amount) {
+    std::size_t ret = reader.Read(to, amount);
+    BOOST_REQUIRE(ret);
+    to += ret;
+    amount -= ret;
+  }
+}
+
+const uint32_t kSize4 = 100000 / 4;
+
+std::string WriteRandom() {
+  char name[] = "tempXXXXXX";
+  scoped_fd original(mkstemp(name));
+  BOOST_REQUIRE(original.get() > 0);
+  for (uint32_t i = 0; i < kSize4; ++i) {
+    WriteOrThrow(original.get(), &i, sizeof(uint32_t));
+  }
+  return name;
+}
+
+void VerifyRead(ReadCompressed &reader) {
+  for (uint32_t i = 0; i < kSize4; ++i) {
+    uint32_t got;
+    ReadLoop(reader, &got, sizeof(uint32_t));
+    BOOST_CHECK_EQUAL(i, got);
+  }
+
+  char ignored;
+  BOOST_CHECK_EQUAL((std::size_t)0, reader.Read(&ignored, 1));
+  // Test double EOF call.
+  BOOST_CHECK_EQUAL((std::size_t)0, reader.Read(&ignored, 1));
+}
+
+void TestRandom(const char *compressor) {
+  std::string name(WriteRandom());
+
+  char gzname[] = "tempXXXXXX";
+  scoped_fd gzipped(mkstemp(gzname));
+
+  std::string command(compressor);
+#ifdef __CYGWIN__
+  command += ".exe";
+#endif
+  command += " <\"";
+  command += name;
+  command += "\" >\"";
+  command += gzname;
+  command += "\"";
+  BOOST_REQUIRE_EQUAL(0, system(command.c_str()));
+
+  BOOST_CHECK_EQUAL(0, unlink(name.c_str()));
+  BOOST_CHECK_EQUAL(0, unlink(gzname));
+
+  ReadCompressed reader(gzipped.release());
+  VerifyRead(reader);
+}
+
+BOOST_AUTO_TEST_CASE(Uncompressed) {
+  TestRandom("cat");
+}
+
+#ifdef HAVE_ZLIB
+BOOST_AUTO_TEST_CASE(ReadGZ) {
+  TestRandom("gzip");
+}
+#endif // HAVE_ZLIB
+
+#ifdef HAVE_BZLIB
+BOOST_AUTO_TEST_CASE(ReadBZ) {
+  TestRandom("bzip2");
+}
+#endif // HAVE_BZLIB
+
+#ifdef HAVE_XZLIB
+BOOST_AUTO_TEST_CASE(ReadXZ) {
+  TestRandom("xz");
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(IStream) {
+  std::string name(WriteRandom());
+  std::fstream stream(name.c_str(), std::ios::in);
+  BOOST_CHECK_EQUAL(0, unlink(name.c_str()));
+  ReadCompressed reader;
+  reader.Reset(stream);
+  VerifyRead(reader);
+}
+
+} // namespace
+} // namespace util
diff --git a/klm/util/scoped.cc b/klm/util/scoped.cc
new file mode 100644
index 00000000..e7066ee4
--- /dev/null
+++ b/klm/util/scoped.cc
@@ -0,0 +1,29 @@
+#include "util/scoped.hh"
+
+#include <cstdlib>
+
+namespace util {
+
+MallocException::MallocException(std::size_t requested) throw() {
+  *this << "for " << requested << " bytes ";
+}
+
+MallocException::~MallocException() throw() {}
+
+void *MallocOrThrow(std::size_t requested) {
+  void *ret;
+  UTIL_THROW_IF_ARG(!(ret = std::malloc(requested)), MallocException, (requested), "in malloc");
+  return ret;
+}
+
+scoped_malloc::~scoped_malloc() {
+  std::free(p_);
+}
+
+void scoped_malloc::call_realloc(std::size_t to) {
+  void *ret;
+  UTIL_THROW_IF_ARG(!(ret = std::realloc(p_, to)) && to, MallocException, (to), "in realloc");
+  p_ = ret;
+}
+
+} // namespace util
diff --git a/klm/util/scoped.hh b/klm/util/scoped.hh
index 93e2e817..d0a5aabd 100644
--- a/klm/util/scoped.hh
+++ b/klm/util/scoped.hh
@@ -1,58 +1,34 @@
 #ifndef UTIL_SCOPED__
 #define UTIL_SCOPED__
+/* Other scoped objects in the style of scoped_ptr. */
 
 #include "util/exception.hh"
-
-/* Other scoped objects in the style of scoped_ptr. */
 #include <cstddef>
-#include <cstdlib>
 
 namespace util {
 
-template <class T, class R, R (*Free)(T*)> class scoped_thing {
+class MallocException : public ErrnoException {
   public:
-    explicit scoped_thing(T *c = static_cast<T*>(0)) : c_(c) {}
-
-    ~scoped_thing() { if (c_) Free(c_); }
-
-    void reset(T *c) {
-      if (c_) Free(c_);
-      c_ = c;
-    }
-
-    T &operator*() { return *c_; }
-    const T&operator*() const { return *c_; }
-    T &operator->() { return *c_; }
-    const T&operator->() const { return *c_; }
-
-    T *get() { return c_; }
-    const T *get() const { return c_; }
-
-  private:
-    T *c_;
-
-    scoped_thing(const scoped_thing &);
-    scoped_thing &operator=(const scoped_thing &);
+    explicit MallocException(std::size_t requested) throw();
+    ~MallocException() throw();
 };
 
+void *MallocOrThrow(std::size_t requested);
+
 class scoped_malloc {
   public:
     scoped_malloc() : p_(NULL) {}
 
     scoped_malloc(void *p) : p_(p) {}
 
-    ~scoped_malloc() { std::free(p_); }
+    ~scoped_malloc();
 
     void reset(void *p = NULL) {
       scoped_malloc other(p_);
       p_ = p;
     }
 
-    void call_realloc(std::size_t to) {
-      void *ret;
-      UTIL_THROW_IF(!(ret = std::realloc(p_, to)) && to, util::ErrnoException, "realloc to " << to << " bytes failed.");
-      p_ = ret;
-    }
+    void call_realloc(std::size_t to);
 
     void *get() { return p_; }
     const void *get() const { return p_; }
@@ -77,9 +53,6 @@ template <class T> class scoped_array {
     T &operator*() { return *c_; }
     const T&operator*() const { return *c_; }
 
-    T &operator->() { return *c_; }
-    const T&operator->() const { return *c_; }
-
     T &operator[](std::size_t idx) { return c_[idx]; }
     const T &operator[](std::size_t idx) const { return c_[idx]; }
 
@@ -90,6 +63,39 @@ template <class T> class scoped_array {
 
   private:
     T *c_;
+
+    scoped_array(const scoped_array &);
+    void operator=(const scoped_array &);
+};
+
+template <class T> class scoped_ptr {
+  public:
+    explicit scoped_ptr(T *content = NULL) : c_(content) {}
+
+    ~scoped_ptr() { delete c_; }
+
+    T *get() { return c_; }
+    const T* get() const { return c_; }
+
+    T &operator*() { return *c_; }
+    const T&operator*() const { return *c_; }
+
+    T *operator->() { return c_; }
+    const T*operator->() const { return c_; }
+
+    T &operator[](std::size_t idx) { return c_[idx]; }
+    const T &operator[](std::size_t idx) const { return c_[idx]; }
+
+    void reset(T *to = NULL) {
+      scoped_ptr<T> other(c_);
+      c_ = to;
+    }
+
+  private:
+    T *c_;
+
+    scoped_ptr(const scoped_ptr &);
+    void operator=(const scoped_ptr &);
 };
 
 } // namespace util
diff --git a/klm/util/stream/Makefile.am b/klm/util/stream/Makefile.am
new file mode 100644
index 00000000..f18cbedb
--- /dev/null
+++ b/klm/util/stream/Makefile.am
@@ -0,0 +1,20 @@
+noinst_LIBRARIES = libklm_util_stream.a
+
+libklm_util_stream_a_SOURCES = \
+  block.hh \
+  chain.cc \
+  chain.hh \
+  config.hh \
+  io.cc \
+  io.hh \
+  line_input.cc \
+  line_input.hh \
+  multi_progress.cc \
+  multi_progress.hh \
+  sort.hh \
+  stream.hh \
+  timer.hh
+
+AM_CPPFLAGS = -W -Wall -I$(top_srcdir)/klm
+
+#-I$(top_srcdir)/klm/util/double-conversion
diff --git a/klm/util/stream/block.hh b/klm/util/stream/block.hh
new file mode 100644
index 00000000..11aa991e
--- /dev/null
+++ b/klm/util/stream/block.hh
@@ -0,0 +1,43 @@
+#ifndef UTIL_STREAM_BLOCK__
+#define UTIL_STREAM_BLOCK__
+
+#include <cstddef>
+#include <stdint.h>
+
+namespace util {
+namespace stream {
+
+class Block {
+  public:
+    Block() : mem_(NULL), valid_size_(0) {}
+
+    Block(void *mem, std::size_t size) : mem_(mem), valid_size_(size) {}
+
+    void SetValidSize(std::size_t to) { valid_size_ = to; }
+    // Read might fill in less than Allocated at EOF.   
+    std::size_t ValidSize() const { return valid_size_; }
+
+    void *Get() { return mem_; }
+    const void *Get() const { return mem_; }
+
+    const void *ValidEnd() const { 
+      return reinterpret_cast<const uint8_t*>(mem_) + valid_size_;
+    }
+
+    operator bool() const { return mem_ != NULL; }
+    bool operator!() const { return mem_ == NULL; }
+ 
+  private:
+    friend class Link;
+    void SetToPoison() {
+      mem_ = NULL;
+    }
+
+    void *mem_;
+    std::size_t valid_size_;
+};
+
+} // namespace stream
+} // namespace util
+
+#endif // UTIL_STREAM_BLOCK__
diff --git a/klm/util/stream/chain.cc b/klm/util/stream/chain.cc
new file mode 100644
index 00000000..46708c60
--- /dev/null
+++ b/klm/util/stream/chain.cc
@@ -0,0 +1,155 @@
+#include "util/stream/chain.hh"
+
+#include "util/stream/io.hh"
+
+#include "util/exception.hh"
+#include "util/pcqueue.hh"
+
+#include <cstdlib>
+#include <new>
+#include <iostream>
+
+#include <stdint.h>
+#include <stdlib.h>
+
+namespace util {
+namespace stream {
+
+ChainConfigException::ChainConfigException() throw() { *this << "Chain configured with "; }
+ChainConfigException::~ChainConfigException() throw() {}
+
+Thread::~Thread() {
+  thread_.join();
+}
+
+void Thread::UnhandledException(const std::exception &e) {
+  std::cerr << e.what() << std::endl;
+  abort();
+}
+
+void Recycler::Run(const ChainPosition &position) {
+  for (Link l(position); l; ++l) {
+    l->SetValidSize(position.GetChain().BlockSize());
+  }
+}
+
+const Recycler kRecycle = Recycler();
+
+Chain::Chain(const ChainConfig &config) : config_(config), complete_called_(false) {
+  UTIL_THROW_IF(!config.entry_size, ChainConfigException, "zero-size entries.");
+  UTIL_THROW_IF(!config.block_count, ChainConfigException, "block count zero");
+  UTIL_THROW_IF(config.total_memory < config.entry_size * config.block_count, ChainConfigException, config.total_memory << " total memory, too small for " << config.block_count << " blocks of containing entries of size " << config.entry_size);
+  // Round down block size to a multiple of entry size.   
+  block_size_ = config.total_memory / (config.block_count * config.entry_size) * config.entry_size;
+}
+
+Chain::~Chain() {
+  Wait();
+}
+
+ChainPosition Chain::Add() {
+  if (!Running()) Start();
+  PCQueue<Block> &in = queues_.back();
+  queues_.push_back(new PCQueue<Block>(config_.block_count));
+  return ChainPosition(in, queues_.back(), this, progress_);
+}
+
+Chain &Chain::operator>>(const WriteAndRecycle &writer) {
+  threads_.push_back(new Thread(Complete(), writer));
+  return *this;
+}
+
+void Chain::Wait(bool release_memory) {
+  if (queues_.empty()) {
+    assert(threads_.empty());
+    return; // Nothing to wait for.  
+  }
+  if (!complete_called_) CompleteLoop();
+  threads_.clear();
+  for (std::size_t i = 0; queues_.front().Consume(); ++i) {
+    if (i == config_.block_count) {
+      std::cerr << "Chain ending without poison." << std::endl;
+      abort();
+    }
+  }
+  queues_.clear();
+  progress_.Finished();
+  complete_called_ = false;
+  if (release_memory) memory_.reset();
+}
+
+void Chain::Start() {
+  Wait(false);
+  if (!memory_.get()) {
+    // Allocate memory.  
+    assert(threads_.empty());
+    assert(queues_.empty());
+    std::size_t malloc_size = block_size_ * config_.block_count;
+    memory_.reset(MallocOrThrow(malloc_size));
+  }
+  // This queue can accomodate all blocks.    
+  queues_.push_back(new PCQueue<Block>(config_.block_count));
+  // Populate the lead queue with blocks.  
+  uint8_t *base = static_cast<uint8_t*>(memory_.get());
+  for (std::size_t i = 0; i < config_.block_count; ++i) {
+    queues_.front().Produce(Block(base, block_size_));
+    base += block_size_;
+  }
+}
+
+ChainPosition Chain::Complete() {
+  assert(Running());
+  UTIL_THROW_IF(complete_called_, util::Exception, "CompleteLoop() called twice");
+  complete_called_ = true;
+  return ChainPosition(queues_.back(), queues_.front(), this, progress_);
+}
+
+Link::Link() : in_(NULL), out_(NULL), poisoned_(true) {}
+
+void Link::Init(const ChainPosition &position) {
+  UTIL_THROW_IF(in_, util::Exception, "Link::Init twice");
+  in_ = position.in_;
+  out_ = position.out_;
+  poisoned_ = false;
+  progress_ = position.progress_;
+  in_->Consume(current_);
+}
+
+Link::Link(const ChainPosition &position) : in_(NULL) {
+  Init(position);
+}
+
+Link::~Link() {
+  if (current_) {
+    // Probably an exception unwinding.  
+    std::cerr << "Last input should have been poison." << std::endl;
+//    abort();
+  } else {
+    if (!poisoned_) {
+      // Pass the poison!
+      out_->Produce(current_);
+    }
+  }
+}
+
+Link &Link::operator++() {
+  assert(current_);
+  progress_ += current_.ValidSize();
+  out_->Produce(current_);
+  in_->Consume(current_);
+  if (!current_) {
+    poisoned_ = true;
+    out_->Produce(current_);
+  }
+  return *this;
+}
+
+void Link::Poison() {
+  assert(!poisoned_);
+  current_.SetToPoison();
+  out_->Produce(current_);
+  poisoned_ = true;
+}
+
+} // namespace stream
+} // namespace util
diff --git a/klm/util/stream/chain.hh b/klm/util/stream/chain.hh
new file mode 100644
index 00000000..154b9b33
--- /dev/null
+++ b/klm/util/stream/chain.hh
@@ -0,0 +1,198 @@
+#ifndef UTIL_STREAM_CHAIN__
+#define UTIL_STREAM_CHAIN__
+
+#include "util/stream/block.hh"
+#include "util/stream/config.hh"
+#include "util/stream/multi_progress.hh"
+#include "util/scoped.hh"
+
+#include <boost/ptr_container/ptr_vector.hpp>
+#include <boost/thread/thread.hpp>
+
+#include <cstddef>
+
+#include <assert.h>
+
+namespace util {
+template <class T> class PCQueue;
+namespace stream {
+
+class ChainConfigException : public Exception {
+  public:
+    ChainConfigException() throw();
+    ~ChainConfigException() throw();
+};
+
+class Chain;
+// Specifies position in chain for Link constructor.
+class ChainPosition {
+  public:
+    const Chain &GetChain() const { return *chain_; }
+  private:
+    friend class Chain;
+    friend class Link;
+    ChainPosition(PCQueue<Block> &in, PCQueue<Block> &out, Chain *chain, MultiProgress &progress) 
+      : in_(&in), out_(&out), chain_(chain), progress_(progress.Add()) {}
+
+    PCQueue<Block> *in_, *out_;
+
+    Chain *chain_;
+
+    WorkerProgress progress_;
+};
+
+// Position is usually ChainPosition but if there are multiple streams involved, this can be ChainPositions.  
+class Thread {
+  public:
+    template <class Position, class Worker> Thread(const Position &position, const Worker &worker)
+      : thread_(boost::ref(*this), position, worker) {}
+
+    ~Thread();
+
+    template <class Position, class Worker> void operator()(const Position &position, Worker &worker) {
+      try {
+        worker.Run(position);
+      } catch (const std::exception &e) {
+        UnhandledException(e);
+      }
+    }
+
+  private:
+    void UnhandledException(const std::exception &e);
+
+    boost::thread thread_;
+};
+
+class Recycler {
+  public:
+    void Run(const ChainPosition &position);
+};
+
+extern const Recycler kRecycle;
+class WriteAndRecycle;
+
+class Chain {
+  private:
+    template <class T, void (T::*ptr)(const ChainPosition &) = &T::Run> struct CheckForRun {
+      typedef Chain type;
+    };
+
+  public:
+    explicit Chain(const ChainConfig &config);
+
+    ~Chain();
+
+    void ActivateProgress() {
+      assert(!Running());
+      progress_.Activate();
+    }
+
+    void SetProgressTarget(uint64_t target) {
+      progress_.SetTarget(target);
+    }
+
+    std::size_t EntrySize() const {
+      return config_.entry_size;
+    }
+    std::size_t BlockSize() const {
+      return block_size_;
+    }
+
+    // Two ways to add to the chain: Add() or operator>>.  
+    ChainPosition Add();
+
+    // This is for adding threaded workers with a Run method.  
+    template <class Worker> typename CheckForRun<Worker>::type &operator>>(const Worker &worker) {
+      assert(!complete_called_);
+      threads_.push_back(new Thread(Add(), worker));
+      return *this;
+    }
+
+    // Avoid copying the worker.  
+    template <class Worker> typename CheckForRun<Worker>::type &operator>>(const boost::reference_wrapper<Worker> &worker) {
+      assert(!complete_called_);
+      threads_.push_back(new Thread(Add(), worker));
+      return *this;
+    }
+
+    // Note that Link and Stream also define operator>> outside this class.  
+
+    // To complete the loop, call CompleteLoop(), >> kRecycle, or the destructor.  
+    void CompleteLoop() {
+      threads_.push_back(new Thread(Complete(), kRecycle));
+    }
+
+    Chain &operator>>(const Recycler &recycle) {
+      CompleteLoop();
+      return *this;
+    }
+
+    Chain &operator>>(const WriteAndRecycle &writer);
+
+    // Chains are reusable.  Call Wait to wait for everything to finish and free memory.  
+    void Wait(bool release_memory = true);
+
+    // Waits for the current chain to complete (if any) then starts again.  
+    void Start();
+
+    bool Running() const { return !queues_.empty(); }
+
+  private:
+    ChainPosition Complete();
+
+    ChainConfig config_;
+
+    std::size_t block_size_;
+
+    scoped_malloc memory_;
+
+    boost::ptr_vector<PCQueue<Block> > queues_;
+
+    bool complete_called_;
+
+    boost::ptr_vector<Thread> threads_;
+
+    MultiProgress progress_;
+};
+
+// Create the link in the worker thread using the position token.
+class Link {
+  public:
+    // Either default construct and Init or just construct all at once.
+    Link();
+    void Init(const ChainPosition &position);
+
+    explicit Link(const ChainPosition &position);
+
+    ~Link();
+
+    Block &operator*() { return current_; }
+    const Block &operator*() const { return current_; }
+
+    Block *operator->() { return &current_; }
+    const Block *operator->() const { return &current_; }
+
+    Link &operator++();
+
+    operator bool() const { return current_; }
+
+    void Poison();
+
+  private:
+    Block current_;
+    PCQueue<Block> *in_, *out_;
+ 
+    bool poisoned_;
+
+    WorkerProgress progress_;
+};
+
+inline Chain &operator>>(Chain &chain, Link &link) {
+  link.Init(chain.Add());
+  return chain;
+}
+
+} // namespace stream
+} // namespace util
+
+#endif // UTIL_STREAM_CHAIN__
diff --git a/klm/util/stream/config.hh b/klm/util/stream/config.hh
new file mode 100644
index 00000000..1eeb3a8a
--- /dev/null
+++ b/klm/util/stream/config.hh
@@ -0,0 +1,32 @@
+#ifndef UTIL_STREAM_CONFIG__
+#define UTIL_STREAM_CONFIG__
+
+#include <cstddef>
+#include <string>
+
+namespace util { namespace stream {
+
+struct ChainConfig {
+  ChainConfig() {}
+
+  ChainConfig(std::size_t in_entry_size, std::size_t in_block_count, std::size_t in_total_memory)
+    : entry_size(in_entry_size), block_count(in_block_count), total_memory(in_total_memory) {}
+
+  std::size_t entry_size;
+  std::size_t block_count;
+  // Chain's constructor will make this a multiple of entry_size. 
+  std::size_t total_memory;
+};
+
+struct SortConfig {
+  std::string temp_prefix;
+
+  // Size of each input/output buffer.
+  std::size_t buffer_size;
+
+  // Total memory to use when running alone.
+  std::size_t total_memory;
+};
+
+}} // namespaces
+#endif // UTIL_STREAM_CONFIG__
diff --git a/klm/util/stream/io.cc b/klm/util/stream/io.cc
new file mode 100644
index 00000000..0459f706
--- /dev/null
+++ b/klm/util/stream/io.cc
@@ -0,0 +1,66 @@
+#include "util/stream/io.hh"
+
+#include "util/file.hh"
+#include "util/stream/chain.hh"
+
+#include <cstddef>
+
+namespace util {
+namespace stream {
+
+ReadSizeException::ReadSizeException() throw() {}
+ReadSizeException::~ReadSizeException() throw() {}
+
+void Read::Run(const ChainPosition &position) {
+  const std::size_t block_size = position.GetChain().BlockSize();
+  const std::size_t entry_size = position.GetChain().EntrySize();
+  for (Link link(position); link; ++link) {
+    std::size_t got = util::ReadOrEOF(file_, link->Get(), block_size);
+    UTIL_THROW_IF(got % entry_size, ReadSizeException, "File ended with " << got << " bytes, not a multiple of " << entry_size << "."); 
+    if (got == 0) {
+      link.Poison();
+      return;
+    } else {
+      link->SetValidSize(got);
+    }
+  }
+}
+
+void PRead::Run(const ChainPosition &position) {
+  scoped_fd owner;
+  if (own_) owner.reset(file_);
+  const uint64_t size = SizeOrThrow(file_);
+  UTIL_THROW_IF(size % static_cast<uint64_t>(position.GetChain().EntrySize()), ReadSizeException, "File size " << file_ << " size is " << size << " not a multiple of " << position.GetChain().EntrySize());
+  const std::size_t block_size = position.GetChain().BlockSize();
+  const uint64_t block_size64 = static_cast<uint64_t>(block_size);
+  Link link(position);
+  uint64_t offset = 0;
+  for (; offset + block_size64 < size; offset += block_size64, ++link) {
+    PReadOrThrow(file_, link->Get(), block_size, offset);
+    link->SetValidSize(block_size);
+  }
+  // size - offset is <= block_size, so it casts to 32-bit fine.
+  if (size - offset) {
+    PReadOrThrow(file_, link->Get(), size - offset, offset);
+    link->SetValidSize(size - offset);
+    ++link;
+  }
+  link.Poison();
+}
+
+void Write::Run(const ChainPosition &position) {
+  for (Link link(position); link; ++link) {
+    WriteOrThrow(file_, link->Get(), link->ValidSize());
+  }
+}
+
+void WriteAndRecycle::Run(const ChainPosition &position) {
+  const std::size_t block_size = position.GetChain().BlockSize();
+  for (Link link(position); link; ++link) {
+    WriteOrThrow(file_, link->Get(), link->ValidSize());
+    link->SetValidSize(block_size);
+  }
+}
+
+} // namespace stream
+} // namespace util
diff --git a/klm/util/stream/io.hh b/klm/util/stream/io.hh
new file mode 100644
index 00000000..934b6b3f
--- /dev/null
+++ b/klm/util/stream/io.hh
@@ -0,0 +1,76 @@
+#ifndef UTIL_STREAM_IO__
+#define UTIL_STREAM_IO__
+
+#include "util/exception.hh"
+#include "util/file.hh"
+
+namespace util {
+namespace stream {
+
+class ChainPosition;
+
+class ReadSizeException : public util::Exception {
+  public:
+    ReadSizeException() throw();
+    ~ReadSizeException() throw();
+};
+
+class Read {
+  public:
+    explicit Read(int fd) : file_(fd) {}
+    void Run(const ChainPosition &position); 
+  private:
+    int file_;
+};
+
+// Like read but uses pread so that the file can be accessed from multiple threads.  
+class PRead {
+  public:
+    explicit PRead(int fd, bool take_own = false) : file_(fd), own_(take_own) {}
+    void Run(const ChainPosition &position);
+  private:
+    int file_;
+    bool own_;
+};
+
+class Write {
+  public:
+    explicit Write(int fd) : file_(fd) {}
+    void Run(const ChainPosition &position);
+  private:
+    int file_;
+};
+
+class WriteAndRecycle {
+  public:
+    explicit WriteAndRecycle(int fd) : file_(fd) {}
+    void Run(const ChainPosition &position);
+  private:
+    int file_;
+};
+
+// Reuse the same file over and over again to buffer output.  
+class FileBuffer {
+  public:
+    explicit FileBuffer(int fd) : file_(fd) {}
+
+    WriteAndRecycle Sink() const {
+      util::SeekOrThrow(file_.get(), 0);
+      return WriteAndRecycle(file_.get());
+    }
+
+    PRead Source() const {
+      return PRead(file_.get());
+    }
+
+    uint64_t Size() const {
+      return SizeOrThrow(file_.get());
+    }
+
+  private:
+    scoped_fd file_;
+};
+
+} // namespace stream
+} // namespace util
+#endif // UTIL_STREAM_IO__
diff --git a/klm/util/stream/io_test.cc b/klm/util/stream/io_test.cc
new file mode 100644
index 00000000..82108335
--- /dev/null
+++ b/klm/util/stream/io_test.cc
@@ -0,0 +1,38 @@
+#include "util/stream/io.hh"
+
+#include "util/stream/chain.hh"
+#include "util/file.hh"
+
+#define BOOST_TEST_MODULE IOTest
+#include <boost/test/unit_test.hpp>
+
+#include <unistd.h>
+
+namespace util { namespace stream { namespace {
+
+BOOST_AUTO_TEST_CASE(CopyFile) {
+  std::string temps("io_test_temp");
+
+  scoped_fd in(MakeTemp(temps));
+  for (uint64_t i = 0; i < 100000; ++i) {
+    WriteOrThrow(in.get(), &i, sizeof(uint64_t));
+  }
+  SeekOrThrow(in.get(), 0);
+  scoped_fd out(MakeTemp(temps));
+
+  ChainConfig config;
+  config.entry_size = 8;
+  config.total_memory = 1024;
+  config.block_count = 10;
+
+  Chain(config) >> PRead(in.get()) >> Write(out.get());
+
+  SeekOrThrow(out.get(), 0);
+  for (uint64_t i = 0; i < 100000; ++i) {
+    uint64_t got;
+    ReadOrThrow(out.get(), &got, sizeof(uint64_t));
+    BOOST_CHECK_EQUAL(i, got);
+  }
+}
+
+}}} // namespaces
diff --git a/klm/util/stream/line_input.cc b/klm/util/stream/line_input.cc
new file mode 100644
index 00000000..dafa5020
--- /dev/null
+++ b/klm/util/stream/line_input.cc
@@ -0,0 +1,52 @@
+#include "util/stream/line_input.hh"
+
+#include "util/exception.hh"
+#include "util/file.hh"
+#include "util/read_compressed.hh"
+#include "util/stream/chain.hh"
+
+#include <algorithm>
+#include <vector>
+
+namespace util { namespace stream {
+
+void LineInput::Run(const ChainPosition &position) {
+  ReadCompressed reader(fd_);
+  // Holding area for beginning of line to be placed in next block.
+  std::vector<char> carry;
+  
+  for (Link block(position); ; ++block) {
+    char *to = static_cast<char*>(block->Get());
+    char *begin = to;
+    char *end = to + position.GetChain().BlockSize();
+    std::copy(carry.begin(), carry.end(), to);
+    to += carry.size();
+    while (to != end) {
+      std::size_t got = reader.Read(to, end - to);
+      if (!got) {
+        // EOF
+        block->SetValidSize(to - begin);
+        ++block;
+        block.Poison();
+        return;
+      }
+      to += got;
+    }
+
+    // Find the last newline.
+    char *newline;
+    for (newline = to - 1; ; --newline) {
+      UTIL_THROW_IF(newline < begin, Exception, "Did not find a newline in " << position.GetChain().BlockSize() << " bytes of input of " << NameFromFD(fd_) << ".  Is this a text file?");
+      if (*newline == '\n') break;
+    }
+    
+    // Copy everything after the last newline to the carry.
+    carry.clear();
+    carry.resize(to - (newline + 1));
+    std::copy(newline + 1, to, &*carry.begin());
+
+    block->SetValidSize(newline + 1 - begin);
+  }
+}
+
+}} // namespaces
diff --git a/klm/util/stream/line_input.hh b/klm/util/stream/line_input.hh
new file mode 100644
index 00000000..86db1dd0
--- /dev/null
+++ b/klm/util/stream/line_input.hh
@@ -0,0 +1,22 @@
+#ifndef UTIL_STREAM_LINE_INPUT__
+#define UTIL_STREAM_LINE_INPUT__
+namespace util {namespace stream {
+
+class ChainPosition;
+
+/* Worker that reads input into blocks, ensuring that blocks contain whole
+ * lines.  Assumes that the maximum size of a line is less than the block size
+ */
+class LineInput {
+  public:
+    // Takes ownership upon thread execution.
+    explicit LineInput(int fd);
+
+    void Run(const ChainPosition &position);
+
+  private:
+    int fd_;
+};
+
+}} // namespaces
+#endif // UTIL_STREAM_LINE_INPUT__
diff --git a/klm/util/stream/multi_progress.cc b/klm/util/stream/multi_progress.cc
new file mode 100644
index 00000000..8ba10386
--- /dev/null
+++ b/klm/util/stream/multi_progress.cc
@@ -0,0 +1,86 @@
+#include "util/stream/multi_progress.hh"
+
+// TODO: merge some functionality with the simple progress bar?
+#include "util/ersatz_progress.hh"
+
+#include <iostream>
+#include <limits>
+
+#include <string.h>
+
+#if !defined(_WIN32) && !defined(_WIN64)
+#include <unistd.h>
+#endif
+
+namespace util { namespace stream {
+
+namespace {
+const char kDisplayCharacters[] = "-+*#0123456789";
+
+uint64_t Next(unsigned char stone, uint64_t complete) {
+  return (static_cast<uint64_t>(stone + 1) * complete + MultiProgress::kWidth - 1) / MultiProgress::kWidth;
+}
+
+} // namespace
+
+MultiProgress::MultiProgress() : active_(false), complete_(std::numeric_limits<uint64_t>::max()), character_handout_(0) {}
+
+MultiProgress::~MultiProgress() {
+  if (active_ && complete_ != std::numeric_limits<uint64_t>::max())
+    std::cerr << '\n';
+}
+
+void MultiProgress::Activate() {
+  active_ = 
+#if !defined(_WIN32) && !defined(_WIN64)
+    // Is stderr a terminal?  
+    (isatty(2) == 1)
+#else
+    true
+#endif
+    ;
+}
+
+void MultiProgress::SetTarget(uint64_t complete) {
+  if (!active_) return;
+  complete_ = complete;
+  if (!complete) complete_ = 1;
+  memset(display_, 0, sizeof(display_));
+  character_handout_ = 0;
+  std::cerr << kProgressBanner;
+}
+
+WorkerProgress MultiProgress::Add() {
+  if (!active_)
+    return WorkerProgress(std::numeric_limits<uint64_t>::max(), *this, '\0');
+  std::size_t character_index;
+  {
+    boost::unique_lock<boost::mutex> lock(mutex_);
+    character_index = character_handout_++;
+    if (character_handout_ == sizeof(kDisplayCharacters) - 1)
+      character_handout_ = 0;
+  }
+  return WorkerProgress(Next(0, complete_), *this, kDisplayCharacters[character_index]);
+}
+
+void MultiProgress::Finished() {
+  if (!active_ || complete_ == std::numeric_limits<uint64_t>::max()) return;
+  std::cerr << '\n';
+  complete_ = std::numeric_limits<uint64_t>::max();
+}
+
+void MultiProgress::Milestone(WorkerProgress &worker) {
+  if (!active_ || complete_ == std::numeric_limits<uint64_t>::max()) return;
+  unsigned char stone = std::min(static_cast<uint64_t>(kWidth), worker.current_ * kWidth / complete_);
+  for (char *i = &display_[worker.stone_]; i < &display_[stone]; ++i) {
+    *i = worker.character_;
+  }
+  worker.next_ = Next(stone, complete_);
+  worker.stone_ = stone;
+  {
+    boost::unique_lock<boost::mutex> lock(mutex_);
+    std::cerr << '\r' << display_ << std::flush;
+  }
+}
+
+}} // namespaces
diff --git a/klm/util/stream/multi_progress.hh b/klm/util/stream/multi_progress.hh
new file mode 100644
index 00000000..c4dd45a9
--- /dev/null
+++ b/klm/util/stream/multi_progress.hh
@@ -0,0 +1,90 @@
+/* Progress bar suitable for chains of workers */
+#ifndef UTIL_MULTI_PROGRESS__
+#define UTIL_MULTI_PROGRESS__
+
+#include <boost/thread/mutex.hpp>
+
+#include <cstddef>
+
+#include <stdint.h>
+
+namespace util { namespace stream {
+
+class WorkerProgress;
+
+class MultiProgress {
+  public:
+    static const unsigned char kWidth = 100;
+
+    MultiProgress();
+
+    ~MultiProgress();
+
+    // Turns on showing (requires SetTarget too).
+    void Activate();
+
+    void SetTarget(uint64_t complete);
+
+    WorkerProgress Add();
+
+    void Finished();
+
+  private:
+    friend class WorkerProgress;
+    void Milestone(WorkerProgress &worker);
+
+    bool active_;
+
+    uint64_t complete_;
+
+    boost::mutex mutex_;
+
+    // \0 at the end.  
+    char display_[kWidth + 1];
+
+    std::size_t character_handout_;
+
+    MultiProgress(const MultiProgress &);
+    MultiProgress &operator=(const MultiProgress &);
+};
+
+class WorkerProgress {
+  public:
+    // Default contrutor must be initialized with operator= later.  
+    WorkerProgress() : parent_(NULL) {}
+
+    // Not threadsafe for the same worker by default.  
+    WorkerProgress &operator++() {
+      if (++current_ >= next_) {
+        parent_->Milestone(*this);
+      }
+      return *this;
+    }
+
+    WorkerProgress &operator+=(uint64_t amount) {
+      current_ += amount;
+      if (current_ >= next_) {
+        parent_->Milestone(*this);
+      }
+      return *this;
+    }
+
+  private:
+    friend class MultiProgress;
+    WorkerProgress(uint64_t next, MultiProgress &parent, char character) 
+      : current_(0), next_(next), parent_(&parent), stone_(0), character_(character) {}
+
+    uint64_t current_, next_;
+
+    MultiProgress *parent_;
+
+    // Previous milestone reached.  
+    unsigned char stone_;
+
+    // Character to display in bar.  
+    char character_;
+};
+
+}} // namespaces
+
+#endif // UTIL_MULTI_PROGRESS__
diff --git a/klm/util/stream/sort.hh b/klm/util/stream/sort.hh
new file mode 100644
index 00000000..16aa6a03
--- /dev/null
+++ b/klm/util/stream/sort.hh
@@ -0,0 +1,548 @@
+/* Usage:
+ * Sort<Compare> sorter(temp, compare);
+ * Chain(config) >> Read(file) >> sorter.Unsorted();
+ * Stream stream;
+ * Chain chain(config) >> sorter.Sorted(internal_config, lazy_config) >> stream;
+ * 
+ * Note that sorter must outlive any threads that use Unsorted or Sorted.  
+ *
+ * Combiners take the form:
+ * bool operator()(void *into, const void *option, const Compare &compare) const
+ * which returns true iff a combination happened.  The sorting algorithm
+ * guarantees compare(into, option).  But it does not guarantee 
+ * compare(option, into).  
+ * Currently, combining is only done in merge steps, not during on-the-fly
+ * sort.  Use a hash table for that.  
+ */
+
+#ifndef UTIL_STREAM_SORT__
+#define UTIL_STREAM_SORT__
+
+#include "util/stream/chain.hh"
+#include "util/stream/config.hh"
+#include "util/stream/io.hh"
+#include "util/stream/stream.hh"
+#include "util/stream/timer.hh"
+
+#include "util/file.hh"
+#include "util/scoped.hh"
+#include "util/sized_iterator.hh"
+
+#include <algorithm>
+#include <iostream>
+#include <queue>
+#include <string>
+
+namespace util {
+namespace stream {
+
+struct NeverCombine {
+  template <class Compare> bool operator()(const void *, const void *, const Compare &) const { 
+    return false;
+  }
+};
+
+// Manage the offsets of sorted blocks in a file.  
+class Offsets {
+  public:
+    explicit Offsets(int fd) : log_(fd) {
+      Reset();
+    }
+
+    int File() const { return log_; }
+
+    void Append(uint64_t length) {
+      if (!length) return;
+      ++block_count_;
+      if (length == cur_.length) {
+        ++cur_.run;
+        return;
+      }
+      WriteOrThrow(log_, &cur_, sizeof(Entry));
+      cur_.length = length;
+      cur_.run = 1;
+    }
+
+    void FinishedAppending() {
+      WriteOrThrow(log_, &cur_, sizeof(Entry));
+      SeekOrThrow(log_, sizeof(Entry)); // Skip 0,0 at beginning.
+      cur_.run = 0;
+      if (block_count_) {
+        ReadOrThrow(log_, &cur_, sizeof(Entry));
+        assert(cur_.length);
+        assert(cur_.run);
+      }
+    }
+
+    uint64_t RemainingBlocks() const { return block_count_; }
+
+    uint64_t TotalOffset() const { return output_sum_; }
+
+    uint64_t PeekSize() const {
+      return cur_.length;
+    }
+
+    uint64_t NextSize() {
+      assert(block_count_);
+      uint64_t ret = cur_.length;
+      output_sum_ += ret;
+
+      --cur_.run;
+      --block_count_;
+      if (!cur_.run && block_count_) {
+        ReadOrThrow(log_, &cur_, sizeof(Entry));
+        assert(cur_.length);
+        assert(cur_.run);
+      }
+      return ret;
+    }
+
+    void Reset() {
+      SeekOrThrow(log_, 0);
+      ResizeOrThrow(log_, 0);
+      cur_.length = 0;
+      cur_.run = 0;
+      block_count_ = 0;
+      output_sum_ = 0;
+    }
+
+  private:
+    int log_;
+
+    struct Entry {
+      uint64_t length;
+      uint64_t run;
+    };
+    Entry cur_;
+
+    uint64_t block_count_;
+
+    uint64_t output_sum_;
+};
+
+// A priority queue of entries backed by file buffers
+template <class Compare> class MergeQueue {
+  public:
+    MergeQueue(int fd, std::size_t buffer_size, std::size_t entry_size, const Compare &compare)
+      : queue_(Greater(compare)), in_(fd), buffer_size_(buffer_size), entry_size_(entry_size) {}
+
+    void Push(void *base, uint64_t offset, uint64_t amount) {
+      queue_.push(Entry(base, in_, offset, amount, buffer_size_));
+    }
+
+    const void *Top() const {
+      return queue_.top().Current();
+    }
+
+    void Pop() {
+      Entry top(queue_.top());
+      queue_.pop();
+      if (top.Increment(in_, buffer_size_, entry_size_))
+        queue_.push(top);
+    }
+
+    std::size_t Size() const {
+      return queue_.size();
+    }
+
+    bool Empty() const {
+      return queue_.empty();
+    }
+
+  private:
+    // Priority queue contains these entries.  
+    class Entry {
+      public:
+        Entry() {}
+
+        Entry(void *base, int fd, uint64_t offset, uint64_t amount, std::size_t buf_size) {
+          offset_ = offset;
+          remaining_ = amount;
+          buffer_end_ = static_cast<uint8_t*>(base) + buf_size;
+          Read(fd, buf_size);
+        }
+
+        bool Increment(int fd, std::size_t buf_size, std::size_t entry_size) {
+          current_ += entry_size;
+          if (current_ != buffer_end_) return true;
+          return Read(fd, buf_size);
+        }
+
+        const void *Current() const { return current_; }
+
+      private:
+        bool Read(int fd, std::size_t buf_size) {
+          current_ = buffer_end_ - buf_size;
+          std::size_t amount;
+          if (static_cast<uint64_t>(buf_size) < remaining_) {
+            amount = buf_size;
+          } else if (!remaining_) {
+            return false;
+          } else {
+            amount = remaining_;
+            buffer_end_ = current_ + remaining_;
+          }
+          PReadOrThrow(fd, current_, amount, offset_);
+          offset_ += amount;
+          assert(current_ <= buffer_end_);
+          remaining_ -= amount;
+          return true;
+        }
+
+        // Buffer
+        uint8_t *current_, *buffer_end_;
+        // File
+        uint64_t remaining_, offset_;
+    };
+
+    // Wrapper comparison function for queue entries.   
+    class Greater : public std::binary_function<const Entry &, const Entry &, bool> {
+      public:
+        explicit Greater(const Compare &compare) : compare_(compare) {}
+
+        bool operator()(const Entry &first, const Entry &second) const {
+          return compare_(second.Current(), first.Current());
+        }
+
+      private:
+        const Compare compare_;
+    };
+
+    typedef std::priority_queue<Entry, std::vector<Entry>, Greater> Queue;
+    Queue queue_;
+
+    const int in_;
+    const std::size_t buffer_size_;
+    const std::size_t entry_size_;
+};
+
+/* A worker object that merges.  If the number of pieces to merge exceeds the
+ * arity, it outputs multiple sorted blocks, recording to out_offsets.  
+ * However, users will only every see a single sorted block out output because
+ * Sort::Sorted insures the arity is higher than the number of pieces before
+ * returning this.   
+ */
+template <class Compare, class Combine> class MergingReader {
+  public:
+    MergingReader(int in, Offsets *in_offsets, Offsets *out_offsets, std::size_t buffer_size, std::size_t total_memory, const Compare &compare, const Combine &combine) :
+        compare_(compare), combine_(combine),
+        in_(in),
+        in_offsets_(in_offsets), out_offsets_(out_offsets),
+        buffer_size_(buffer_size), total_memory_(total_memory) {}
+
+    void Run(const ChainPosition &position) {
+      Run(position, false);
+    }
+
+    void Run(const ChainPosition &position, bool assert_one) {
+      // Special case: nothing to read.  
+      if (!in_offsets_->RemainingBlocks()) {
+        Link l(position);
+        l.Poison();
+        return;
+      }
+      // If there's just one entry, just read.
+      if (in_offsets_->RemainingBlocks() == 1) {
+        // Sequencing is important.
+        uint64_t offset = in_offsets_->TotalOffset();
+        uint64_t amount = in_offsets_->NextSize();
+        ReadSingle(offset, amount, position);
+        if (out_offsets_) out_offsets_->Append(amount);
+        return;
+      }
+
+      Stream str(position);
+      scoped_malloc buffer(MallocOrThrow(total_memory_));
+      uint8_t *const buffer_end = static_cast<uint8_t*>(buffer.get()) + total_memory_;
+
+      const std::size_t entry_size = position.GetChain().EntrySize();
+
+      while (in_offsets_->RemainingBlocks()) {
+        // Use bigger buffers if there's less remaining.
+        uint64_t per_buffer = static_cast<uint64_t>(std::max<std::size_t>(
+            buffer_size_,
+            static_cast<std::size_t>((static_cast<uint64_t>(total_memory_) / in_offsets_->RemainingBlocks()))));
+        per_buffer -= per_buffer % entry_size;
+        assert(per_buffer);
+
+        // Populate queue.
+        MergeQueue<Compare> queue(in_, per_buffer, entry_size, compare_);
+        for (uint8_t *buf = static_cast<uint8_t*>(buffer.get()); 
+            in_offsets_->RemainingBlocks() && (buf + std::min(per_buffer, in_offsets_->PeekSize()) <= buffer_end);) {
+          uint64_t offset = in_offsets_->TotalOffset();
+          uint64_t size = in_offsets_->NextSize();
+          queue.Push(buf, offset, size);
+          buf += static_cast<std::size_t>(std::min<uint64_t>(size, per_buffer));
+        }
+        // This shouldn't happen but it's probably better to die than loop indefinitely.
+        if (queue.Size() < 2 && in_offsets_->RemainingBlocks()) {
+          std::cerr << "Bug in sort implementation: not merging at least two stripes." << std::endl;
+          abort();
+        }
+        if (assert_one && in_offsets_->RemainingBlocks()) {
+          std::cerr << "Bug in sort implementation: should only be one merge group for lazy sort" << std::endl;
+          abort();
+        }
+
+        uint64_t written = 0;
+        // Merge including combiner support.  
+        memcpy(str.Get(), queue.Top(), entry_size);
+        for (queue.Pop(); !queue.Empty(); queue.Pop()) {
+          if (!combine_(str.Get(), queue.Top(), compare_)) {
+            ++written; ++str;
+            memcpy(str.Get(), queue.Top(), entry_size);
+          }
+        }
+        ++written; ++str;
+        if (out_offsets_)
+          out_offsets_->Append(written * entry_size);
+      }
+      str.Poison();
+    }
+
+  private: 
+    void ReadSingle(uint64_t offset, const uint64_t size, const ChainPosition &position) {
+      // Special case: only one to read.  
+      const uint64_t end = offset + size;
+      const uint64_t block_size = position.GetChain().BlockSize();
+      Link l(position);
+      for (; offset + block_size < end; ++l, offset += block_size) {
+        PReadOrThrow(in_, l->Get(), block_size, offset);
+        l->SetValidSize(block_size);
+      }
+      PReadOrThrow(in_, l->Get(), end - offset, offset);
+      l->SetValidSize(end - offset);
+      (++l).Poison();
+      return;
+    }
+   
+    Compare compare_;
+    Combine combine_;
+
+    int in_;
+
+  protected:
+    Offsets *in_offsets_;
+
+  private:
+    Offsets *out_offsets_;
+ 
+    std::size_t buffer_size_;
+    std::size_t total_memory_;
+};
+
+// The lazy step owns the remaining files.  This keeps track of them.  
+template <class Compare, class Combine> class OwningMergingReader : public MergingReader<Compare, Combine> {
+  private:
+    typedef MergingReader<Compare, Combine> P;
+  public:
+    OwningMergingReader(int data, const Offsets &offsets, std::size_t buffer, std::size_t lazy, const Compare &compare, const Combine &combine) 
+      : P(data, NULL, NULL, buffer, lazy, compare, combine),
+        data_(data),
+        offsets_(offsets) {}
+
+    void Run(const ChainPosition &position) {
+      P::in_offsets_ = &offsets_;
+      scoped_fd data(data_);
+      scoped_fd offsets_file(offsets_.File());
+      P::Run(position, true);
+    }
+
+  private:
+    int data_;
+    Offsets offsets_;
+};
+
+// Don't use this directly.  Worker that sorts blocks.   
+template <class Compare> class BlockSorter {
+  public:
+    BlockSorter(Offsets &offsets, const Compare &compare) :
+      offsets_(&offsets), compare_(compare) {}
+
+    void Run(const ChainPosition &position) {
+      const std::size_t entry_size = position.GetChain().EntrySize();
+      for (Link link(position); link; ++link) {
+        // Record the size of each block in a separate file.    
+        offsets_->Append(link->ValidSize());
+        void *end = static_cast<uint8_t*>(link->Get()) + link->ValidSize();
+#if defined(_WIN32) || defined(_WIN64)
+        std::stable_sort
+#else
+        std::sort
+#endif
+          (SizedIt(link->Get(), entry_size),
+           SizedIt(end, entry_size),
+           compare_);
+      }
+      offsets_->FinishedAppending();
+    }
+
+  private:
+    Offsets *offsets_;
+    SizedCompare<Compare> compare_;
+};
+
+class BadSortConfig : public Exception {
+  public:
+    BadSortConfig() throw() {}
+    ~BadSortConfig() throw() {}
+};
+
+template <class Compare, class Combine = NeverCombine> class Sort {
+  public:
+    Sort(Chain &in, const SortConfig &config, const Compare &compare = Compare(), const Combine &combine = Combine())
+      : config_(config),
+        data_(MakeTemp(config.temp_prefix)),
+        offsets_file_(MakeTemp(config.temp_prefix)), offsets_(offsets_file_.get()),
+        compare_(compare), combine_(combine),
+        entry_size_(in.EntrySize()) {
+      UTIL_THROW_IF(!entry_size_, BadSortConfig, "Sorting entries of size 0");
+      // Make buffer_size a multiple of the entry_size.  
+      config_.buffer_size -= config_.buffer_size % entry_size_;
+      UTIL_THROW_IF(!config_.buffer_size, BadSortConfig, "Sort buffer too small");
+      UTIL_THROW_IF(config_.total_memory < config_.buffer_size * 4, BadSortConfig, "Sorting memory " << config_.total_memory << " is too small for four buffers (two read and two write).");
+      in >> BlockSorter<Compare>(offsets_, compare_) >> WriteAndRecycle(data_.get());
+    }
+
+    uint64_t Size() const {
+      return SizeOrThrow(data_.get());
+    }
+
+    // Do merge sort, terminating when lazy merge could be done with the
+    // specified memory.  Return the minimum memory necessary to do lazy merge.
+    std::size_t Merge(std::size_t lazy_memory) {
+      if (offsets_.RemainingBlocks() <= 1) return 0;
+      const uint64_t lazy_arity = std::max<uint64_t>(1, lazy_memory / config_.buffer_size);
+      uint64_t size = Size();
+      /* No overflow because
+       * offsets_.RemainingBlocks() * config_.buffer_size <= lazy_memory ||
+       * size < lazy_memory
+       */
+      if (offsets_.RemainingBlocks() <= lazy_arity || size <= static_cast<uint64_t>(lazy_memory))
+        return std::min<std::size_t>(size, offsets_.RemainingBlocks() * config_.buffer_size);
+
+      scoped_fd data2(MakeTemp(config_.temp_prefix));
+      int fd_in = data_.get(), fd_out = data2.get();
+      scoped_fd offsets2_file(MakeTemp(config_.temp_prefix));
+      Offsets offsets2(offsets2_file.get());
+      Offsets *offsets_in = &offsets_, *offsets_out = &offsets2;
+
+      // Double buffered writing.  
+      ChainConfig chain_config;
+      chain_config.entry_size = entry_size_;
+      chain_config.block_count = 2;
+      chain_config.total_memory = config_.buffer_size * 2;
+      Chain chain(chain_config);
+
+      while (offsets_in->RemainingBlocks() > lazy_arity) {
+        if (size <= static_cast<uint64_t>(lazy_memory)) break;
+        std::size_t reading_memory = config_.total_memory - 2 * config_.buffer_size;
+        if (size < static_cast<uint64_t>(reading_memory)) {
+          reading_memory = static_cast<std::size_t>(size);
+        }
+        SeekOrThrow(fd_in, 0);
+        chain >>
+          MergingReader<Compare, Combine>(
+              fd_in,
+              offsets_in, offsets_out,
+              config_.buffer_size,
+              reading_memory,
+              compare_, combine_) >>
+          WriteAndRecycle(fd_out);
+        chain.Wait();
+        offsets_out->FinishedAppending();
+        ResizeOrThrow(fd_in, 0);
+        offsets_in->Reset();
+        std::swap(fd_in, fd_out);
+        std::swap(offsets_in, offsets_out);
+        size = SizeOrThrow(fd_in);
+      }
+
+      SeekOrThrow(fd_in, 0);
+      if (fd_in == data2.get()) {
+        data_.reset(data2.release());
+        offsets_file_.reset(offsets2_file.release());
+        offsets_ = offsets2;
+      }
+      if (offsets_.RemainingBlocks() <= 1) return 0;
+      // No overflow because the while loop exited.
+      return std::min(size, offsets_.RemainingBlocks() * static_cast<uint64_t>(config_.buffer_size));
+    }
+
+    // Output to chain, using this amount of memory, maximum, for lazy merge
+    // sort.  
+    void Output(Chain &out, std::size_t lazy_memory) {
+      Merge(lazy_memory);
+      out.SetProgressTarget(Size());
+      out >> OwningMergingReader<Compare, Combine>(data_.get(), offsets_, config_.buffer_size, lazy_memory, compare_, combine_);
+      data_.release();
+      offsets_file_.release();
+    }
+
+    /* If a pipeline step is reading sorted input and writing to a different
+     * sort order, then there's a trade-off between using RAM to read lazily
+     * (avoiding copying the file) and using RAM to increase block size and, 
+     * therefore, decrease the number of merge sort passes in the next
+     * iteration.  
+     * 
+     * Merge sort takes log_{arity}(pieces) passes.  Thus, each time the chain
+     * block size is multiplied by arity, the number of output passes decreases
+     * by one.  Up to a constant, then, log_{arity}(chain) is the number of
+     * passes saved.  Chain simply divides the memory evenly over all blocks.
+     * 
+     * Lazy sort saves this many passes (up to a constant)
+     *   log_{arity}((memory-lazy)/block_count) + 1
+     * Non-lazy sort saves this many passes (up to the same constant):
+     *   log_{arity}(memory/block_count)
+     * Add log_{arity}(block_count) to both:
+     *   log_{arity}(memory-lazy) + 1 versus log_{arity}(memory)
+     * Take arity to the power of both sizes (arity > 1)
+     *   (memory - lazy)*arity versus memory
+     * Solve for lazy
+     *   lazy = memory * (arity - 1) / arity
+     */
+    std::size_t DefaultLazy() {
+      float arity = static_cast<float>(config_.total_memory / config_.buffer_size);
+      return static_cast<std::size_t>(static_cast<float>(config_.total_memory) * (arity - 1.0) / arity);
+    }
+
+    // Same as Output with default lazy memory setting.
+    void Output(Chain &out) {
+      Output(out, DefaultLazy());
+    }
+
+    // Completely merge sort and transfer ownership to the caller.
+    int StealCompleted() {
+      // Merge all the way.
+      Merge(0);
+      SeekOrThrow(data_.get(), 0);
+      offsets_file_.reset();
+      return data_.release();
+    }
+
+  private:
+    SortConfig config_;
+
+    scoped_fd data_;
+
+    scoped_fd offsets_file_;
+    Offsets offsets_;
+
+    const Compare compare_;
+    const Combine combine_;
+    const std::size_t entry_size_;
+};
+
+// returns bytes to be read on demand.  
+template <class Compare, class Combine> uint64_t BlockingSort(Chain &chain, const SortConfig &config, const Compare &compare = Compare(), const Combine &combine = NeverCombine()) {
+  Sort<Compare, Combine> sorter(chain, config, compare, combine);
+  chain.Wait(true);
+  uint64_t size = sorter.Size();
+  sorter.Output(chain);
+  return size;
+}
+
+} // namespace stream
+} // namespace util
+
+#endif // UTIL_STREAM_SORT__
diff --git a/klm/util/stream/sort_test.cc b/klm/util/stream/sort_test.cc
new file mode 100644
index 00000000..fd7705cd
--- /dev/null
+++ b/klm/util/stream/sort_test.cc
@@ -0,0 +1,62 @@
+#include "util/stream/sort.hh"
+
+#define BOOST_TEST_MODULE SortTest
+#include <boost/test/unit_test.hpp>
+
+#include <algorithm>
+
+#include <unistd.h>
+
+namespace util { namespace stream { namespace {
+
+struct CompareUInt64 : public std::binary_function<const void *, const void *, bool> {
+  bool operator()(const void *first, const void *second) const {
+    return *static_cast<const uint64_t*>(first) < *reinterpret_cast<const uint64_t*>(second);
+  }
+};
+
+const uint64_t kSize = 100000;
+
+struct Putter {
+  Putter(std::vector<uint64_t> &shuffled) : shuffled_(shuffled) {}
+
+  void Run(const ChainPosition &position) {
+    Stream put_shuffled(position);
+    for (uint64_t i = 0; i < shuffled_.size(); ++i, ++put_shuffled) {
+      *static_cast<uint64_t*>(put_shuffled.Get()) = shuffled_[i];
+    }
+    put_shuffled.Poison();
+  }
+  std::vector<uint64_t> &shuffled_;
+};
+
+BOOST_AUTO_TEST_CASE(FromShuffled) {
+  std::vector<uint64_t> shuffled;
+  shuffled.reserve(kSize);
+  for (uint64_t i = 0; i < kSize; ++i) {
+    shuffled.push_back(i);
+  }
+  std::random_shuffle(shuffled.begin(), shuffled.end());
+  
+  ChainConfig config;
+  config.entry_size = 8;
+  config.total_memory = 800;
+  config.block_count = 3;
+
+  SortConfig merge_config;
+  merge_config.temp_prefix = "sort_test_temp";
+  merge_config.buffer_size = 800;
+  merge_config.total_memory = 3300;
+
+  Chain chain(config);
+  chain >> Putter(shuffled);
+  BlockingSort(chain, merge_config, CompareUInt64(), NeverCombine());
+  Stream sorted;
+  chain >> sorted >> kRecycle;
+  for (uint64_t i = 0; i < kSize; ++i, ++sorted) {
+    BOOST_CHECK_EQUAL(i, *static_cast<const uint64_t*>(sorted.Get()));
+  }
+  BOOST_CHECK(!sorted);
+}
+
+}}} // namespaces
diff --git a/klm/util/stream/stream.hh b/klm/util/stream/stream.hh
new file mode 100644
index 00000000..6ff45b82
--- /dev/null
+++ b/klm/util/stream/stream.hh
@@ -0,0 +1,74 @@
+#ifndef UTIL_STREAM_STREAM__
+#define UTIL_STREAM_STREAM__
+
+#include "util/stream/chain.hh"
+
+#include <boost/noncopyable.hpp>
+
+#include <assert.h>
+#include <stdint.h>
+
+namespace util {
+namespace stream {
+
+class Stream : boost::noncopyable {
+  public:
+    Stream() : current_(NULL), end_(NULL) {}
+
+    void Init(const ChainPosition &position) {
+      entry_size_ = position.GetChain().EntrySize();
+      block_size_ = position.GetChain().BlockSize();
+      block_it_.Init(position);
+      StartBlock();
+    }
+
+    explicit Stream(const ChainPosition &position) {
+      Init(position);
+    }
+
+    operator bool() const { return current_ != NULL; }
+    bool operator!() const { return current_ == NULL; }
+
+    const void *Get() const { return current_; }
+    void *Get() { return current_; }
+
+    void Poison() {
+      block_it_->SetValidSize(current_ - static_cast<uint8_t*>(block_it_->Get()));
+      ++block_it_;
+      block_it_.Poison();
+    }
+    
+    Stream &operator++() {
+      assert(*this);
+      assert(current_ < end_);
+      current_ += entry_size_;
+      if (current_ == end_) {
+        ++block_it_;
+        StartBlock();
+      }
+      return *this;
+    }
+
+  private:
+    void StartBlock() {
+      for (; block_it_ && !block_it_->ValidSize(); ++block_it_) {}
+      current_ = static_cast<uint8_t*>(block_it_->Get());
+      end_ = current_ + block_it_->ValidSize();
+    }
+
+    uint8_t *current_, *end_;
+
+    std::size_t entry_size_;
+    std::size_t block_size_;
+
+    Link block_it_;
+};
+
+inline Chain &operator>>(Chain &chain, Stream &stream) {
+  stream.Init(chain.Add());
+  return chain;
+}
+
+} // namespace stream
+} // namespace util
+#endif // UTIL_STREAM_STREAM__
diff --git a/klm/util/stream/stream_test.cc b/klm/util/stream/stream_test.cc
new file mode 100644
index 00000000..6575d50d
--- /dev/null
+++ b/klm/util/stream/stream_test.cc
@@ -0,0 +1,35 @@
+#include "util/stream/io.hh"
+
+#include "util/stream/stream.hh"
+#include "util/file.hh"
+
+#define BOOST_TEST_MODULE StreamTest
+#include <boost/test/unit_test.hpp>
+
+#include <unistd.h>
+
+namespace util { namespace stream { namespace {
+
+BOOST_AUTO_TEST_CASE(StreamTest) {
+  scoped_fd in(MakeTemp("io_test_temp"));
+  for (uint64_t i = 0; i < 100000; ++i) {
+    WriteOrThrow(in.get(), &i, sizeof(uint64_t));
+  }
+  SeekOrThrow(in.get(), 0);
+
+  ChainConfig config;
+  config.entry_size = 8;
+  config.total_memory = 100;
+  config.block_count = 12;
+
+  Stream s;
+  Chain chain(config);
+  chain >> Read(in.get()) >> s >> kRecycle;
+  uint64_t i = 0;
+  for (; s; ++s, ++i) {
+    BOOST_CHECK_EQUAL(i, *static_cast<const uint64_t*>(s.Get()));
+  }
+  BOOST_CHECK_EQUAL(100000ULL, i);
+}
+
+}}} // namespaces
diff --git a/klm/util/stream/timer.hh b/klm/util/stream/timer.hh
new file mode 100644
index 00000000..7e1a5885
--- /dev/null
+++ b/klm/util/stream/timer.hh
@@ -0,0 +1,16 @@
+#ifndef UTIL_STREAM_TIMER__
+#define UTIL_STREAM_TIMER__
+
+// Sorry Jon, this was adding library dependencies in Moses and people complained.
+
+/*#include <boost/version.hpp>
+
+#if BOOST_VERSION >= 104800
+#include <boost/timer/timer.hpp>
+#define UTIL_TIMER(str) boost::timer::auto_cpu_timer timer(std::cerr, 1, (str))
+#else
+//#warning Using Boost older than 1.48. Timing information will not be available.*/
+#define UTIL_TIMER(str) 
+//#endif
+
+#endif // UTIL_STREAM_TIMER__
diff --git a/klm/util/string_piece.cc b/klm/util/string_piece.cc
new file mode 100644
index 00000000..ec394b96
--- /dev/null
+++ b/klm/util/string_piece.cc
@@ -0,0 +1,193 @@
+// Copyright 2004 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in string_piece.hh.
+
+#include "util/string_piece.hh"
+
+#include <algorithm>
+
+#include <limits.h>
+
+#ifndef HAVE_ICU
+
+typedef StringPiece::size_type size_type;
+
+void StringPiece::CopyToString(std::string* target) const {
+  target->assign(ptr_, length_);
+}
+
+size_type StringPiece::find(const StringPiece& s, size_type pos) const {
+  // Not sure why length_ < 0 was here since it's std::size_t.
+  if (/*length_ < 0 || */pos > static_cast<size_type>(length_))
+    return npos;
+
+  const char* result = std::search(ptr_ + pos, ptr_ + length_,
+                                   s.ptr_, s.ptr_ + s.length_);
+  const size_type xpos = result - ptr_;
+  return xpos + s.length_ <= length_ ? xpos : npos;
+}
+
+size_type StringPiece::find(char c, size_type pos) const {
+  if (length_ <= 0 || pos >= static_cast<size_type>(length_)) {
+    return npos;
+  }
+  const char* result = std::find(ptr_ + pos, ptr_ + length_, c);
+  return result != ptr_ + length_ ? result - ptr_ : npos;
+}
+
+size_type StringPiece::rfind(const StringPiece& s, size_type pos) const {
+  if (length_ < s.length_) return npos;
+  const size_t ulen = length_;
+  if (s.length_ == 0) return std::min(ulen, pos);
+
+  const char* last = ptr_ + std::min(ulen - s.length_, pos) + s.length_;
+  const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_);
+  return result != last ? result - ptr_ : npos;
+}
+
+size_type StringPiece::rfind(char c, size_type pos) const {
+  if (length_ <= 0) return npos;
+  for (int i = std::min(pos, static_cast<size_type>(length_ - 1));
+       i >= 0; --i) {
+    if (ptr_[i] == c) {
+      return i;
+    }
+  }
+  return npos;
+}
+
+// For each character in characters_wanted, sets the index corresponding
+// to the ASCII code of that character to 1 in table.  This is used by
+// the find_.*_of methods below to tell whether or not a character is in
+// the lookup table in constant time.
+// The argument `table' must be an array that is large enough to hold all
+// the possible values of an unsigned char.  Thus it should be be declared
+// as follows:
+//   bool table[UCHAR_MAX + 1]
+static inline void BuildLookupTable(const StringPiece& characters_wanted,
+                                    bool* table) {
+  const size_type length = characters_wanted.length();
+  const char* const data = characters_wanted.data();
+  for (size_type i = 0; i < length; ++i) {
+    table[static_cast<unsigned char>(data[i])] = true;
+  }
+}
+
+size_type StringPiece::find_first_of(const StringPiece& s,
+                                     size_type pos) const {
+  if (length_ == 0 || s.length_ == 0)
+    return npos;
+
+  // Avoid the cost of BuildLookupTable() for a single-character search.
+  if (s.length_ == 1)
+    return find_first_of(s.ptr_[0], pos);
+
+  bool lookup[UCHAR_MAX + 1] = { false };
+  BuildLookupTable(s, lookup);
+  for (size_type i = pos; i < length_; ++i) {
+    if (lookup[static_cast<unsigned char>(ptr_[i])]) {
+      return i;
+    }
+  }
+  return npos;
+}
+
+size_type StringPiece::find_first_not_of(const StringPiece& s,
+                                         size_type pos) const {
+  if (length_ == 0)
+    return npos;
+
+  if (s.length_ == 0)
+    return 0;
+
+  // Avoid the cost of BuildLookupTable() for a single-character search.
+  if (s.length_ == 1)
+    return find_first_not_of(s.ptr_[0], pos);
+
+  bool lookup[UCHAR_MAX + 1] = { false };
+  BuildLookupTable(s, lookup);
+  for (size_type i = pos; i < length_; ++i) {
+    if (!lookup[static_cast<unsigned char>(ptr_[i])]) {
+      return i;
+    }
+  }
+  return npos;
+}
+
+size_type StringPiece::find_first_not_of(char c, size_type pos) const {
+  if (length_ == 0)
+    return npos;
+
+  for (; pos < length_; ++pos) {
+    if (ptr_[pos] != c) {
+      return pos;
+    }
+  }
+  return npos;
+}
+
+size_type StringPiece::find_last_of(const StringPiece& s, size_type pos) const {
+  if (length_ == 0 || s.length_ == 0)
+    return npos;
+
+  // Avoid the cost of BuildLookupTable() for a single-character search.
+  if (s.length_ == 1)
+    return find_last_of(s.ptr_[0], pos);
+
+  bool lookup[UCHAR_MAX + 1] = { false };
+  BuildLookupTable(s, lookup);
+  for (size_type i = std::min(pos, length_ - 1); ; --i) {
+    if (lookup[static_cast<unsigned char>(ptr_[i])])
+      return i;
+    if (i == 0)
+      break;
+  }
+  return npos;
+}
+
+size_type StringPiece::find_last_not_of(const StringPiece& s,
+                                        size_type pos) const {
+  if (length_ == 0)
+    return npos;
+
+  size_type i = std::min(pos, length_ - 1);
+  if (s.length_ == 0)
+    return i;
+
+  // Avoid the cost of BuildLookupTable() for a single-character search.
+  if (s.length_ == 1)
+    return find_last_not_of(s.ptr_[0], pos);
+
+  bool lookup[UCHAR_MAX + 1] = { false };
+  BuildLookupTable(s, lookup);
+  for (; ; --i) {
+    if (!lookup[static_cast<unsigned char>(ptr_[i])])
+      return i;
+    if (i == 0)
+      break;
+  }
+  return npos;
+}
+
+size_type StringPiece::find_last_not_of(char c, size_type pos) const {
+  if (length_ == 0)
+    return npos;
+
+  for (size_type i = std::min(pos, length_ - 1); ; --i) {
+    if (ptr_[i] != c)
+      return i;
+    if (i == 0)
+      break;
+  }
+  return npos;
+}
+
+StringPiece StringPiece::substr(size_type pos, size_type n) const {
+  if (pos > length_) pos = length_;
+  if (n > length_ - pos) n = length_ - pos;
+  return StringPiece(ptr_ + pos, n);
+}
+
+const size_type StringPiece::npos = size_type(-1);
+
+#endif // !HAVE_ICU
diff --git a/klm/util/string_piece.hh b/klm/util/string_piece.hh
index be6a643d..9cf4c7f6 100644
--- a/klm/util/string_piece.hh
+++ b/klm/util/string_piece.hh
@@ -1,6 +1,6 @@
 /* If you use ICU in your program, then compile with -DHAVE_ICU -licui18n.  If
  * you don't use ICU, then this will use the Google implementation from Chrome.
- * This has been modified from the original version to let you choose.  
+ * This has been modified from the original version to let you choose.
  */
 
 // Copyright 2008, Google Inc.
@@ -50,10 +50,6 @@
 
 #include "util/have.hh"
 
-#ifdef HAVE_BOOST
-#include <boost/functional/hash/hash.hpp>
-#endif // HAVE_BOOST
-
 #include <cstring>
 #include <iosfwd>
 #include <ostream>
@@ -62,9 +58,9 @@
 #include <unicode/stringpiece.h>
 #include <unicode/uversion.h>
 
-// Old versions of ICU don't define operator== and operator!=.  
+// Old versions of ICU don't define operator== and operator!=.
 #if (U_ICU_VERSION_MAJOR_NUM < 4) || ((U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM < 4))
-#warning You are using an old version of ICU.  Consider upgrading to ICU >= 4.6.  
+#warning You are using an old version of ICU.  Consider upgrading to ICU >= 4.6.
 inline bool operator==(const StringPiece& x, const StringPiece& y) {
   if (x.size() != y.size())
     return false;
@@ -256,33 +252,9 @@ inline std::ostream& operator<<(std::ostream& o, const StringPiece& piece) {
   return o.write(piece.data(), static_cast<std::streamsize>(piece.size()));
 }
 
-#ifdef HAVE_BOOST
-inline size_t hash_value(const StringPiece &str) {
-  return boost::hash_range(str.data(), str.data() + str.length());
-}
-
-/* Support for lookup of StringPiece in boost::unordered_map<std::string> */
-struct StringPieceCompatibleHash : public std::unary_function<const StringPiece &, size_t> {
-  size_t operator()(const StringPiece &str) const {
-    return hash_value(str);
-  }
-};
-
-struct StringPieceCompatibleEquals : public std::binary_function<const StringPiece &, const std::string &, bool> {
-  bool operator()(const StringPiece &first, const StringPiece &second) const {
-    return first == second;
-  }
-};
-template <class T> typename T::const_iterator FindStringPiece(const T &t, const StringPiece &key) {
-  return t.find(key, StringPieceCompatibleHash(), StringPieceCompatibleEquals());
-}
-template <class T> typename T::iterator FindStringPiece(T &t, const StringPiece &key) {
-  return t.find(key, StringPieceCompatibleHash(), StringPieceCompatibleEquals());
-}
-#endif
-
 #ifdef HAVE_ICU
 U_NAMESPACE_END
+using U_NAMESPACE_QUALIFIER StringPiece;
 #endif
 
 #endif  // BASE_STRING_PIECE_H__
diff --git a/klm/util/string_piece_hash.hh b/klm/util/string_piece_hash.hh
new file mode 100644
index 00000000..f206b1d8
--- /dev/null
+++ b/klm/util/string_piece_hash.hh
@@ -0,0 +1,43 @@
+#ifndef UTIL_STRING_PIECE_HASH__
+#define UTIL_STRING_PIECE_HASH__
+
+#include "util/string_piece.hh"
+
+#include <boost/functional/hash.hpp>
+#include <boost/version.hpp>
+
+inline size_t hash_value(const StringPiece &str) {
+  return boost::hash_range(str.data(), str.data() + str.length());
+}
+
+/* Support for lookup of StringPiece in boost::unordered_map<std::string> */
+struct StringPieceCompatibleHash : public std::unary_function<const StringPiece &, size_t> {
+  size_t operator()(const StringPiece &str) const {
+    return hash_value(str);
+  }
+};
+
+struct StringPieceCompatibleEquals : public std::binary_function<const StringPiece &, const std::string &, bool> {
+  bool operator()(const StringPiece &first, const StringPiece &second) const {
+    return first == second;
+  }
+};
+template <class T> typename T::const_iterator FindStringPiece(const T &t, const StringPiece &key) {
+#if BOOST_VERSION < 104200
+  std::string temp(key.data(), key.size());
+  return t.find(temp);
+#else
+  return t.find(key, StringPieceCompatibleHash(), StringPieceCompatibleEquals());
+#endif
+}
+
+template <class T> typename T::iterator FindStringPiece(T &t, const StringPiece &key) {
+#if BOOST_VERSION < 104200
+  std::string temp(key.data(), key.size());
+  return t.find(temp);
+#else
+  return t.find(key, StringPieceCompatibleHash(), StringPieceCompatibleEquals());
+#endif
+}
+
+#endif // UTIL_STRING_PIECE_HASH__
diff --git a/klm/util/thread_pool.hh b/klm/util/thread_pool.hh
new file mode 100644
index 00000000..84e257ea
--- /dev/null
+++ b/klm/util/thread_pool.hh
@@ -0,0 +1,95 @@
+#ifndef UTIL_THREAD_POOL__
+#define UTIL_THREAD_POOL__
+
+#include "util/pcqueue.hh"
+
+#include <boost/ptr_container/ptr_vector.hpp>
+#include <boost/optional.hpp>
+#include <boost/thread.hpp>
+
+#include <iostream>
+
+#include <stdlib.h>
+
+namespace util {
+
+template <class HandlerT> class Worker : boost::noncopyable {
+  public:
+    typedef HandlerT Handler;
+    typedef typename Handler::Request Request;
+
+    template <class Construct> Worker(PCQueue<Request> &in, Construct &construct, Request &poison)
+      : in_(in), handler_(construct), thread_(boost::ref(*this)), poison_(poison) {}
+
+    // Only call from thread.
+    void operator()() {
+      Request request;
+      while (1) {
+        in_.Consume(request);
+        if (request == poison_) return;
+        try {
+          (*handler_)(request);
+        }
+        catch(std::exception &e) {
+          std::cerr << "Handler threw " << e.what() << std::endl;
+          abort();
+        }
+        catch(...) {
+          std::cerr << "Handler threw an exception, dropping request" << std::endl;
+          abort();
+        }
+      }
+    }
+
+    void Join() {
+      thread_.join();
+    }
+
+  private:
+    PCQueue<Request> &in_;
+
+    boost::optional<Handler> handler_;
+
+    boost::thread thread_;
+
+    Request poison_;
+};
+
+template <class HandlerT> class ThreadPool : boost::noncopyable {
+  public:
+    typedef HandlerT Handler;
+    typedef typename Handler::Request Request;
+
+    template <class Construct> ThreadPool(size_t queue_length, size_t workers, Construct handler_construct, Request poison) : in_(queue_length), poison_(poison) {
+      for (size_t i = 0; i < workers; ++i) {
+        workers_.push_back(new Worker<Handler>(in_, handler_construct, poison));
+      }
+    }
+
+    ~ThreadPool() {
+      for (size_t i = 0; i < workers_.size(); ++i) {
+        Produce(poison_);
+      }
+      for (typename boost::ptr_vector<Worker<Handler> >::iterator i = workers_.begin(); i != workers_.end(); ++i) {
+        i->Join();
+      }
+    }
+
+    void Produce(const Request &request) {
+      in_.Produce(request);
+    }
+
+    // For adding to the queue.
+    PCQueue<Request> &In() { return in_; }
+
+  private:
+    PCQueue<Request> in_;
+
+    boost::ptr_vector<Worker<Handler> > workers_;
+
+    Request poison_;
+};
+
+} // namespace util
+
+#endif // UTIL_THREAD_POOL__
diff --git a/klm/util/tokenize_piece.hh b/klm/util/tokenize_piece.hh
index c7e1c863..a588c3fc 100644
--- a/klm/util/tokenize_piece.hh
+++ b/klm/util/tokenize_piece.hh
@@ -20,6 +20,7 @@ class OutOfTokens : public Exception {
 
 class SingleCharacter {
   public:
+    SingleCharacter() {}
     explicit SingleCharacter(char delim) : delim_(delim) {}
 
     StringPiece Find(const StringPiece &in) const {
@@ -32,6 +33,8 @@ class SingleCharacter {
 
 class MultiCharacter {
   public:
+    MultiCharacter() {}
+
     explicit MultiCharacter(const StringPiece &delimiter) : delimiter_(delimiter) {}
 
     StringPiece Find(const StringPiece &in) const {
@@ -44,6 +47,7 @@ class MultiCharacter {
 
 class AnyCharacter {
   public:
+    AnyCharacter() {}
     explicit AnyCharacter(const StringPiece &chars) : chars_(chars) {}
 
     StringPiece Find(const StringPiece &in) const {
@@ -54,6 +58,20 @@ class AnyCharacter {
     StringPiece chars_;
 };
 
+class AnyCharacterLast {
+  public:
+    AnyCharacterLast() {}
+
+    explicit AnyCharacterLast(const StringPiece &chars) : chars_(chars) {}
+
+    StringPiece Find(const StringPiece &in) const {
+      return StringPiece(std::find_end(in.data(), in.data() + in.size(), chars_.data(), chars_.data() + chars_.size()), 1);
+    }
+
+  private:
+    StringPiece chars_;
+};
+
 template <class Find, bool SkipEmpty = false> class TokenIter : public boost::iterator_facade<TokenIter<Find, SkipEmpty>, const StringPiece, boost::forward_traversal_tag> {
   public:
     TokenIter() {}
@@ -69,8 +87,8 @@ template <class Find, bool SkipEmpty = false> class TokenIter : public boost::it
       return current_.data() != 0;
     }
 
-    static TokenIter<Find> end() {
-      return TokenIter<Find>();
+    static TokenIter<Find, SkipEmpty> end() {
+      return TokenIter<Find, SkipEmpty>();
     }
 
   private:
@@ -88,8 +106,8 @@ template <class Find, bool SkipEmpty = false> class TokenIter : public boost::it
       } while (SkipEmpty && current_.data() && current_.empty()); // Compiler should optimize this away if SkipEmpty is false.  
     }
 
-    bool equal(const TokenIter<Find> &other) const {
-      return after_.data() == other.after_.data();
+    bool equal(const TokenIter<Find, SkipEmpty> &other) const {
+      return current_.data() == other.current_.data();
     }
 
     const StringPiece &dereference() const {
diff --git a/klm/util/usage.cc b/klm/util/usage.cc
index e5cf76f0..b8e125d0 100644
--- a/klm/util/usage.cc
+++ b/klm/util/usage.cc
@@ -1,13 +1,17 @@
 #include "util/usage.hh"
 
+#include "util/exception.hh"
+
 #include <fstream>
 #include <ostream>
+#include <sstream>
 
 #include <string.h>
 #include <ctype.h>
 #if !defined(_WIN32) && !defined(_WIN64)
 #include <sys/resource.h>
 #include <sys/time.h>
+#include <unistd.h>
 #endif
 
 namespace util {
@@ -43,4 +47,60 @@ void PrintUsage(std::ostream &out) {
 #endif
 }
 
+uint64_t GuessPhysicalMemory() {
+#if defined(_WIN32) || defined(_WIN64)
+  return 0;
+#elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
+  long pages = sysconf(_SC_PHYS_PAGES);
+  if (pages == -1) return 0;
+  long page_size = sysconf(_SC_PAGESIZE);
+  if (page_size == -1) return 0;
+  return static_cast<uint64_t>(pages) * static_cast<uint64_t>(page_size);
+#else
+  return 0;
+#endif
+}
+
+namespace {
+class SizeParseError : public Exception {
+  public:
+    explicit SizeParseError(const std::string &str) throw() {
+      *this << "Failed to parse " << str << " into a memory size ";
+    }
+};
+
+template <class Num> uint64_t ParseNum(const std::string &arg) {
+  std::stringstream stream(arg);
+  Num value;
+  stream >> value;
+  UTIL_THROW_IF_ARG(!stream, SizeParseError, (arg), "for the leading number.");
+  std::string after;
+  stream >> after;
+  UTIL_THROW_IF_ARG(after.size() > 1, SizeParseError, (arg), "because there are more than two characters after the number.");
+  std::string throwaway;
+  UTIL_THROW_IF_ARG(stream >> throwaway, SizeParseError, (arg), "because there was more cruft " << throwaway << " after the number.");
+
+  // Silly sort, using kilobytes as your default unit.  
+  if (after.empty()) after = "K";
+  if (after == "%") {
+    uint64_t mem = GuessPhysicalMemory();
+    UTIL_THROW_IF_ARG(!mem, SizeParseError, (arg), "because % was specified but the physical memory size could not be determined.");
+    return static_cast<double>(value) * static_cast<double>(mem) / 100.0;
+  }
+  
+  std::string units("bKMGTPEZY");
+  std::string::size_type index = units.find(after[0]);
+  UTIL_THROW_IF_ARG(index == std::string::npos, SizeParseError, (arg), "the allowed suffixes are " << units << "%.");
+  for (std::string::size_type i = 0; i < index; ++i) {
+    value *= 1024;
+  }
+  return value;
+}
+
+} // namespace
+
+uint64_t ParseSize(const std::string &arg) {
+  return arg.find('.') == std::string::npos ? ParseNum<double>(arg) : ParseNum<uint64_t>(arg);
+}
+
 } // namespace util
diff --git a/klm/util/usage.hh b/klm/util/usage.hh
index d331ff74..e19eda7b 100644
--- a/klm/util/usage.hh
+++ b/klm/util/usage.hh
@@ -1,8 +1,18 @@
 #ifndef UTIL_USAGE__
 #define UTIL_USAGE__
+#include <cstddef>
 #include <iosfwd>
+#include <string>
+
+#include <stdint.h>
 
 namespace util {
 void PrintUsage(std::ostream &to);
+
+// Determine how much physical memory there is.  Return 0 on failure.
+uint64_t GuessPhysicalMemory();
+
+// Parse a size like unix sort.  Sadly, this means the default multiplier is K.
+uint64_t ParseSize(const std::string &arg);
 } // namespace util
 #endif // UTIL_USAGE__