58 files changed, 8373 insertions, 107 deletions
diff --git a/klm/util/Makefile.am b/klm/util/Makefile.am
index 3ab7560f..294ebc0a 100644
--- a/klm/util/Makefile.am
+++ b/klm/util/Makefile.am
@@ -29,6 +29,7 @@ libklm_util_a_SOURCES = \
   joint_sort.hh \
   mmap.hh \
   murmur_hash.hh \
+  pcqueue.hh \
   pool.hh \
   probing_hash_table.hh \
   proxy_iterator.hh \
@@ -37,6 +38,7 @@ libklm_util_a_SOURCES = \
   sized_iterator.hh \
   sorted_uniform.hh \
   string_piece.hh \
+  thread_pool.hh \
   tokenize_piece.hh \
   usage.hh \
   ersatz_progress.cc \
@@ -48,7 +50,8 @@ libklm_util_a_SOURCES = \
   murmur_hash.cc \
   pool.cc \
 	read_compressed.cc \
+  scoped.cc \
   string_piece.cc \
 	usage.cc
 
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/klm
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/klm -I$(top_srcdir)/klm/util/double-conversion
diff --git a/klm/util/double-conversion/LICENSE b/klm/util/double-conversion/LICENSE
new file mode 100644
index 00000000..933718a9
--- /dev/null
+++ b/klm/util/double-conversion/LICENSE
@@ -0,0 +1,26 @@
+Copyright 2006-2011, the V8 project authors. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+    * Neither the name of Google Inc. nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/klm/util/double-conversion/Makefile.am b/klm/util/double-conversion/Makefile.am
new file mode 100644
index 00000000..eb6616f7
--- /dev/null
+++ b/klm/util/double-conversion/Makefile.am
@@ -0,0 +1,23 @@
+noinst_LIBRARIES = libklm_util_double.a
+
+libklm_util_double_a_SOURCES = \
+  bignum-dtoa.h \
+  bignum.h \
+  cached-powers.h \
+  diy-fp.h \
+  double-conversion.h \
+  fast-dtoa.h \
+  fixed-dtoa.h \
+  ieee.h \
+  strtod.h \
+  utils.h \
+  bignum.cc \
+  bignum-dtoa.cc \
+  cached-powers.cc \
+  diy-fp.cc \
+  double-conversion.cc \
+  fast-dtoa.cc \
+  fixed-dtoa.cc \
+  strtod.cc
+
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/klm -I$(top_srcdir)/klm/util/double-conversion
diff --git a/klm/util/double-conversion/bignum-dtoa.cc b/klm/util/double-conversion/bignum-dtoa.cc
new file mode 100644
index 00000000..b6c2e85d
--- /dev/null
+++ b/klm/util/double-conversion/bignum-dtoa.cc
@@ -0,0 +1,640 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <math.h>
+
+#include "bignum-dtoa.h"
+
+#include "bignum.h"
+#include "ieee.h"
+
+namespace double_conversion {
+
+static int NormalizedExponent(uint64_t significand, int exponent) {
+  ASSERT(significand != 0);
+  while ((significand & Double::kHiddenBit) == 0) {
+    significand = significand << 1;
+    exponent = exponent - 1;
+  }
+  return exponent;
+}
+
+
+// Forward declarations:
+// Returns an estimation of k such that 10^(k-1) <= v < 10^k.
+static int EstimatePower(int exponent);
+// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator
+// and denominator.
+static void InitialScaledStartValues(uint64_t significand,
+                                     int exponent,
+                                     bool lower_boundary_is_closer,
+                                     int estimated_power,
+                                     bool need_boundary_deltas,
+                                     Bignum* numerator,
+                                     Bignum* denominator,
+                                     Bignum* delta_minus,
+                                     Bignum* delta_plus);
+// Multiplies numerator/denominator so that its values lies in the range 1-10.
+// Returns decimal_point s.t.
+//  v = numerator'/denominator' * 10^(decimal_point-1)
+//     where numerator' and denominator' are the values of numerator and
+//     denominator after the call to this function.
+static void FixupMultiply10(int estimated_power, bool is_even,
+                            int* decimal_point,
+                            Bignum* numerator, Bignum* denominator,
+                            Bignum* delta_minus, Bignum* delta_plus);
+// Generates digits from the left to the right and stops when the generated
+// digits yield the shortest decimal representation of v.
+static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator,
+                                   Bignum* delta_minus, Bignum* delta_plus,
+                                   bool is_even,
+                                   Vector<char> buffer, int* length);
+// Generates 'requested_digits' after the decimal point.
+static void BignumToFixed(int requested_digits, int* decimal_point,
+                          Bignum* numerator, Bignum* denominator,
+                          Vector<char>(buffer), int* length);
+// Generates 'count' digits of numerator/denominator.
+// Once 'count' digits have been produced rounds the result depending on the
+// remainder (remainders of exactly .5 round upwards). Might update the
+// decimal_point when rounding up (for example for 0.9999).
+static void GenerateCountedDigits(int count, int* decimal_point,
+                                  Bignum* numerator, Bignum* denominator,
+                                  Vector<char>(buffer), int* length);
+
+
+void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits,
+                Vector<char> buffer, int* length, int* decimal_point) {
+  ASSERT(v > 0);
+  ASSERT(!Double(v).IsSpecial());
+  uint64_t significand;
+  int exponent;
+  bool lower_boundary_is_closer;
+  if (mode == BIGNUM_DTOA_SHORTEST_SINGLE) {
+    float f = static_cast<float>(v);
+    ASSERT(f == v);
+    significand = Single(f).Significand();
+    exponent = Single(f).Exponent();
+    lower_boundary_is_closer = Single(f).LowerBoundaryIsCloser();
+  } else {
+    significand = Double(v).Significand();
+    exponent = Double(v).Exponent();
+    lower_boundary_is_closer = Double(v).LowerBoundaryIsCloser();
+  }
+  bool need_boundary_deltas =
+      (mode == BIGNUM_DTOA_SHORTEST || mode == BIGNUM_DTOA_SHORTEST_SINGLE);
+
+  bool is_even = (significand & 1) == 0;
+  int normalized_exponent = NormalizedExponent(significand, exponent);
+  // estimated_power might be too low by 1.
+  int estimated_power = EstimatePower(normalized_exponent);
+
+  // Shortcut for Fixed.
+  // The requested digits correspond to the digits after the point. If the
+  // number is much too small, then there is no need in trying to get any
+  // digits.
+  if (mode == BIGNUM_DTOA_FIXED && -estimated_power - 1 > requested_digits) {
+    buffer[0] = '\0';
+    *length = 0;
+    // Set decimal-point to -requested_digits. This is what Gay does.
+    // Note that it should not have any effect anyways since the string is
+    // empty.
+    *decimal_point = -requested_digits;
+    return;
+  }
+
+  Bignum numerator;
+  Bignum denominator;
+  Bignum delta_minus;
+  Bignum delta_plus;
+  // Make sure the bignum can grow large enough. The smallest double equals
+  // 4e-324. In this case the denominator needs fewer than 324*4 binary digits.
+  // The maximum double is 1.7976931348623157e308 which needs fewer than
+  // 308*4 binary digits.
+  ASSERT(Bignum::kMaxSignificantBits >= 324*4);
+  InitialScaledStartValues(significand, exponent, lower_boundary_is_closer,
+                           estimated_power, need_boundary_deltas,
+                           &numerator, &denominator,
+                           &delta_minus, &delta_plus);
+  // We now have v = (numerator / denominator) * 10^estimated_power.
+  FixupMultiply10(estimated_power, is_even, decimal_point,
+                  &numerator, &denominator,
+                  &delta_minus, &delta_plus);
+  // We now have v = (numerator / denominator) * 10^(decimal_point-1), and
+  //  1 <= (numerator + delta_plus) / denominator < 10
+  switch (mode) {
+    case BIGNUM_DTOA_SHORTEST:
+    case BIGNUM_DTOA_SHORTEST_SINGLE:
+      GenerateShortestDigits(&numerator, &denominator,
+                             &delta_minus, &delta_plus,
+                             is_even, buffer, length);
+      break;
+    case BIGNUM_DTOA_FIXED:
+      BignumToFixed(requested_digits, decimal_point,
+                    &numerator, &denominator,
+                    buffer, length);
+      break;
+    case BIGNUM_DTOA_PRECISION:
+      GenerateCountedDigits(requested_digits, decimal_point,
+                            &numerator, &denominator,
+                            buffer, length);
+      break;
+    default:
+      UNREACHABLE();
+  }
+  buffer[*length] = '\0';
+}
+
+
+// The procedure starts generating digits from the left to the right and stops
+// when the generated digits yield the shortest decimal representation of v. A
+// decimal representation of v is a number lying closer to v than to any other
+// double, so it converts to v when read.
+//
+// This is true if d, the decimal representation, is between m- and m+, the
+// upper and lower boundaries. d must be strictly between them if !is_even.
+//           m- := (numerator - delta_minus) / denominator
+//           m+ := (numerator + delta_plus) / denominator
+//
+// Precondition: 0 <= (numerator+delta_plus) / denominator < 10.
+//   If 1 <= (numerator+delta_plus) / denominator < 10 then no leading 0 digit
+//   will be produced. This should be the standard precondition.
+static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator,
+                                   Bignum* delta_minus, Bignum* delta_plus,
+                                   bool is_even,
+                                   Vector<char> buffer, int* length) {
+  // Small optimization: if delta_minus and delta_plus are the same just reuse
+  // one of the two bignums.
+  if (Bignum::Equal(*delta_minus, *delta_plus)) {
+    delta_plus = delta_minus;
+  }
+  *length = 0;
+  while (true) {
+    uint16_t digit;
+    digit = numerator->DivideModuloIntBignum(*denominator);
+    ASSERT(digit <= 9);  // digit is a uint16_t and therefore always positive.
+    // digit = numerator / denominator (integer division).
+    // numerator = numerator % denominator.
+    buffer[(*length)++] = digit + '0';
+
+    // Can we stop already?
+    // If the remainder of the division is less than the distance to the lower
+    // boundary we can stop. In this case we simply round down (discarding the
+    // remainder).
+    // Similarly we test if we can round up (using the upper boundary).
+    bool in_delta_room_minus;
+    bool in_delta_room_plus;
+    if (is_even) {
+      in_delta_room_minus = Bignum::LessEqual(*numerator, *delta_minus);
+    } else {
+      in_delta_room_minus = Bignum::Less(*numerator, *delta_minus);
+    }
+    if (is_even) {
+      in_delta_room_plus =
+          Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0;
+    } else {
+      in_delta_room_plus =
+          Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0;
+    }
+    if (!in_delta_room_minus && !in_delta_room_plus) {
+      // Prepare for next iteration.
+      numerator->Times10();
+      delta_minus->Times10();
+      // We optimized delta_plus to be equal to delta_minus (if they share the
+      // same value). So don't multiply delta_plus if they point to the same
+      // object.
+      if (delta_minus != delta_plus) {
+        delta_plus->Times10();
+      }
+    } else if (in_delta_room_minus && in_delta_room_plus) {
+      // Let's see if 2*numerator < denominator.
+      // If yes, then the next digit would be < 5 and we can round down.
+      int compare = Bignum::PlusCompare(*numerator, *numerator, *denominator);
+      if (compare < 0) {
+        // Remaining digits are less than .5. -> Round down (== do nothing).
+      } else if (compare > 0) {
+        // Remaining digits are more than .5 of denominator. -> Round up.
+        // Note that the last digit could not be a '9' as otherwise the whole
+        // loop would have stopped earlier.
+        // We still have an assert here in case the preconditions were not
+        // satisfied.
+        ASSERT(buffer[(*length) - 1] != '9');
+        buffer[(*length) - 1]++;
+      } else {
+        // Halfway case.
+        // TODO(floitsch): need a way to solve half-way cases.
+        //   For now let's round towards even (since this is what Gay seems to
+        //   do).
+
+        if ((buffer[(*length) - 1] - '0') % 2 == 0) {
+          // Round down => Do nothing.
+        } else {
+          ASSERT(buffer[(*length) - 1] != '9');
+          buffer[(*length) - 1]++;
+        }
+      }
+      return;
+    } else if (in_delta_room_minus) {
+      // Round down (== do nothing).
+      return;
+    } else {  // in_delta_room_plus
+      // Round up.
+      // Note again that the last digit could not be '9' since this would have
+      // stopped the loop earlier.
+      // We still have an ASSERT here, in case the preconditions were not
+      // satisfied.
+      ASSERT(buffer[(*length) -1] != '9');
+      buffer[(*length) - 1]++;
+      return;
+    }
+  }
+}
+
+
+// Let v = numerator / denominator < 10.
+// Then we generate 'count' digits of d = x.xxxxx... (without the decimal point)
+// from left to right. Once 'count' digits have been produced we decide wether
+// to round up or down. Remainders of exactly .5 round upwards. Numbers such
+// as 9.999999 propagate a carry all the way, and change the
+// exponent (decimal_point), when rounding upwards.
+static void GenerateCountedDigits(int count, int* decimal_point,
+                                  Bignum* numerator, Bignum* denominator,
+                                  Vector<char>(buffer), int* length) {
+  ASSERT(count >= 0);
+  for (int i = 0; i < count - 1; ++i) {
+    uint16_t digit;
+    digit = numerator->DivideModuloIntBignum(*denominator);
+    ASSERT(digit <= 9);  // digit is a uint16_t and therefore always positive.
+    // digit = numerator / denominator (integer division).
+    // numerator = numerator % denominator.
+    buffer[i] = digit + '0';
+    // Prepare for next iteration.
+    numerator->Times10();
+  }
+  // Generate the last digit.
+  uint16_t digit;
+  digit = numerator->DivideModuloIntBignum(*denominator);
+  if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) {
+    digit++;
+  }
+  buffer[count - 1] = digit + '0';
+  // Correct bad digits (in case we had a sequence of '9's). Propagate the
+  // carry until we hat a non-'9' or til we reach the first digit.
+  for (int i = count - 1; i > 0; --i) {
+    if (buffer[i] != '0' + 10) break;
+    buffer[i] = '0';
+    buffer[i - 1]++;
+  }
+  if (buffer[0] == '0' + 10) {
+    // Propagate a carry past the top place.
+    buffer[0] = '1';
+    (*decimal_point)++;
+  }
+  *length = count;
+}
+
+
+// Generates 'requested_digits' after the decimal point. It might omit
+// trailing '0's. If the input number is too small then no digits at all are
+// generated (ex.: 2 fixed digits for 0.00001).
+//
+// Input verifies:  1 <= (numerator + delta) / denominator < 10.
+static void BignumToFixed(int requested_digits, int* decimal_point,
+                          Bignum* numerator, Bignum* denominator,
+                          Vector<char>(buffer), int* length) {
+  // Note that we have to look at more than just the requested_digits, since
+  // a number could be rounded up. Example: v=0.5 with requested_digits=0.
+  // Even though the power of v equals 0 we can't just stop here.
+  if (-(*decimal_point) > requested_digits) {
+    // The number is definitively too small.
+    // Ex: 0.001 with requested_digits == 1.
+    // Set decimal-point to -requested_digits. This is what Gay does.
+    // Note that it should not have any effect anyways since the string is
+    // empty.
+    *decimal_point = -requested_digits;
+    *length = 0;
+    return;
+  } else if (-(*decimal_point) == requested_digits) {
+    // We only need to verify if the number rounds down or up.
+    // Ex: 0.04 and 0.06 with requested_digits == 1.
+    ASSERT(*decimal_point == -requested_digits);
+    // Initially the fraction lies in range (1, 10]. Multiply the denominator
+    // by 10 so that we can compare more easily.
+    denominator->Times10();
+    if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) {
+      // If the fraction is >= 0.5 then we have to include the rounded
+      // digit.
+      buffer[0] = '1';
+      *length = 1;
+      (*decimal_point)++;
+    } else {
+      // Note that we caught most of similar cases earlier.
+      *length = 0;
+    }
+    return;
+  } else {
+    // The requested digits correspond to the digits after the point.
+    // The variable 'needed_digits' includes the digits before the point.
+    int needed_digits = (*decimal_point) + requested_digits;
+    GenerateCountedDigits(needed_digits, decimal_point,
+                          numerator, denominator,
+                          buffer, length);
+  }
+}
+
+
+// Returns an estimation of k such that 10^(k-1) <= v < 10^k where
+// v = f * 2^exponent and 2^52 <= f < 2^53.
+// v is hence a normalized double with the given exponent. The output is an
+// approximation for the exponent of the decimal approimation .digits * 10^k.
+//
+// The result might undershoot by 1 in which case 10^k <= v < 10^k+1.
+// Note: this property holds for v's upper boundary m+ too.
+//    10^k <= m+ < 10^k+1.
+//   (see explanation below).
+//
+// Examples:
+//  EstimatePower(0)   => 16
+//  EstimatePower(-52) => 0
+//
+// Note: e >= 0 => EstimatedPower(e) > 0. No similar claim can be made for e<0.
+static int EstimatePower(int exponent) {
+  // This function estimates log10 of v where v = f*2^e (with e == exponent).
+  // Note that 10^floor(log10(v)) <= v, but v <= 10^ceil(log10(v)).
+  // Note that f is bounded by its container size. Let p = 53 (the double's
+  // significand size). Then 2^(p-1) <= f < 2^p.
+  //
+  // Given that log10(v) == log2(v)/log2(10) and e+(len(f)-1) is quite close
+  // to log2(v) the function is simplified to (e+(len(f)-1)/log2(10)).
+  // The computed number undershoots by less than 0.631 (when we compute log3
+  // and not log10).
+  //
+  // Optimization: since we only need an approximated result this computation
+  // can be performed on 64 bit integers. On x86/x64 architecture the speedup is
+  // not really measurable, though.
+  //
+  // Since we want to avoid overshooting we decrement by 1e10 so that
+  // floating-point imprecisions don't affect us.
+  //
+  // Explanation for v's boundary m+: the computation takes advantage of
+  // the fact that 2^(p-1) <= f < 2^p. Boundaries still satisfy this requirement
+  // (even for denormals where the delta can be much more important).
+
+  const double k1Log10 = 0.30102999566398114;  // 1/lg(10)
+
+  // For doubles len(f) == 53 (don't forget the hidden bit).
+  const int kSignificandSize = Double::kSignificandSize;
+  double estimate = ceil((exponent + kSignificandSize - 1) * k1Log10 - 1e-10);
+  return static_cast<int>(estimate);
+}
+
+
+// See comments for InitialScaledStartValues.
+static void InitialScaledStartValuesPositiveExponent(
+    uint64_t significand, int exponent,
+    int estimated_power, bool need_boundary_deltas,
+    Bignum* numerator, Bignum* denominator,
+    Bignum* delta_minus, Bignum* delta_plus) {
+  // A positive exponent implies a positive power.
+  ASSERT(estimated_power >= 0);
+  // Since the estimated_power is positive we simply multiply the denominator
+  // by 10^estimated_power.
+
+  // numerator = v.
+  numerator->AssignUInt64(significand);
+  numerator->ShiftLeft(exponent);
+  // denominator = 10^estimated_power.
+  denominator->AssignPowerUInt16(10, estimated_power);
+
+  if (need_boundary_deltas) {
+    // Introduce a common denominator so that the deltas to the boundaries are
+    // integers.
+    denominator->ShiftLeft(1);
+    numerator->ShiftLeft(1);
+    // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common
+    // denominator (of 2) delta_plus equals 2^e.
+    delta_plus->AssignUInt16(1);
+    delta_plus->ShiftLeft(exponent);
+    // Same for delta_minus. The adjustments if f == 2^p-1 are done later.
+    delta_minus->AssignUInt16(1);
+    delta_minus->ShiftLeft(exponent);
+  }
+}
+
+
+// See comments for InitialScaledStartValues
+static void InitialScaledStartValuesNegativeExponentPositivePower(
+    uint64_t significand, int exponent,
+    int estimated_power, bool need_boundary_deltas,
+    Bignum* numerator, Bignum* denominator,
+    Bignum* delta_minus, Bignum* delta_plus) {
+  // v = f * 2^e with e < 0, and with estimated_power >= 0.
+  // This means that e is close to 0 (have a look at how estimated_power is
+  // computed).
+
+  // numerator = significand
+  //  since v = significand * 2^exponent this is equivalent to
+  //  numerator = v * / 2^-exponent
+  numerator->AssignUInt64(significand);
+  // denominator = 10^estimated_power * 2^-exponent (with exponent < 0)
+  denominator->AssignPowerUInt16(10, estimated_power);
+  denominator->ShiftLeft(-exponent);
+
+  if (need_boundary_deltas) {
+    // Introduce a common denominator so that the deltas to the boundaries are
+    // integers.
+    denominator->ShiftLeft(1);
+    numerator->ShiftLeft(1);
+    // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common
+    // denominator (of 2) delta_plus equals 2^e.
+    // Given that the denominator already includes v's exponent the distance
+    // to the boundaries is simply 1.
+    delta_plus->AssignUInt16(1);
+    // Same for delta_minus. The adjustments if f == 2^p-1 are done later.
+    delta_minus->AssignUInt16(1);
+  }
+}
+
+
+// See comments for InitialScaledStartValues
+static void InitialScaledStartValuesNegativeExponentNegativePower(
+    uint64_t significand, int exponent,
+    int estimated_power, bool need_boundary_deltas,
+    Bignum* numerator, Bignum* denominator,
+    Bignum* delta_minus, Bignum* delta_plus) {
+  // Instead of multiplying the denominator with 10^estimated_power we
+  // multiply all values (numerator and deltas) by 10^-estimated_power.
+
+  // Use numerator as temporary container for power_ten.
+  Bignum* power_ten = numerator;
+  power_ten->AssignPowerUInt16(10, -estimated_power);
+
+  if (need_boundary_deltas) {
+    // Since power_ten == numerator we must make a copy of 10^estimated_power
+    // before we complete the computation of the numerator.
+    // delta_plus = delta_minus = 10^estimated_power
+    delta_plus->AssignBignum(*power_ten);
+    delta_minus->AssignBignum(*power_ten);
+  }
+
+  // numerator = significand * 2 * 10^-estimated_power
+  //  since v = significand * 2^exponent this is equivalent to
+  // numerator = v * 10^-estimated_power * 2 * 2^-exponent.
+  // Remember: numerator has been abused as power_ten. So no need to assign it
+  //  to itself.
+  ASSERT(numerator == power_ten);
+  numerator->MultiplyByUInt64(significand);
+
+  // denominator = 2 * 2^-exponent with exponent < 0.
+  denominator->AssignUInt16(1);
+  denominator->ShiftLeft(-exponent);
+
+  if (need_boundary_deltas) {
+    // Introduce a common denominator so that the deltas to the boundaries are
+    // integers.
+    numerator->ShiftLeft(1);
+    denominator->ShiftLeft(1);
+    // With this shift the boundaries have their correct value, since
+    // delta_plus = 10^-estimated_power, and
+    // delta_minus = 10^-estimated_power.
+    // These assignments have been done earlier.
+    // The adjustments if f == 2^p-1 (lower boundary is closer) are done later.
+  }
+}
+
+
+// Let v = significand * 2^exponent.
+// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator
+// and denominator. The functions GenerateShortestDigits and
+// GenerateCountedDigits will then convert this ratio to its decimal
+// representation d, with the required accuracy.
+// Then d * 10^estimated_power is the representation of v.
+// (Note: the fraction and the estimated_power might get adjusted before
+// generating the decimal representation.)
+//
+// The initial start values consist of:
+//  - a scaled numerator: s.t. numerator/denominator == v / 10^estimated_power.
+//  - a scaled (common) denominator.
+//  optionally (used by GenerateShortestDigits to decide if it has the shortest
+//  decimal converting back to v):
+//  - v - m-: the distance to the lower boundary.
+//  - m+ - v: the distance to the upper boundary.
+//
+// v, m+, m-, and therefore v - m- and m+ - v all share the same denominator.
+//
+// Let ep == estimated_power, then the returned values will satisfy:
+//  v / 10^ep = numerator / denominator.
+//  v's boundarys m- and m+:
+//    m- / 10^ep == v / 10^ep - delta_minus / denominator
+//    m+ / 10^ep == v / 10^ep + delta_plus / denominator
+//  Or in other words:
+//    m- == v - delta_minus * 10^ep / denominator;
+//    m+ == v + delta_plus * 10^ep / denominator;
+//
+// Since 10^(k-1) <= v < 10^k    (with k == estimated_power)
+//  or       10^k <= v < 10^(k+1)
+//  we then have 0.1 <= numerator/denominator < 1
+//           or    1 <= numerator/denominator < 10
+//
+// It is then easy to kickstart the digit-generation routine.
+//
+// The boundary-deltas are only filled if the mode equals BIGNUM_DTOA_SHORTEST
+// or BIGNUM_DTOA_SHORTEST_SINGLE.
+
+static void InitialScaledStartValues(uint64_t significand,
+                                     int exponent,
+                                     bool lower_boundary_is_closer,
+                                     int estimated_power,
+                                     bool need_boundary_deltas,
+                                     Bignum* numerator,
+                                     Bignum* denominator,
+                                     Bignum* delta_minus,
+                                     Bignum* delta_plus) {
+  if (exponent >= 0) {
+    InitialScaledStartValuesPositiveExponent(
+        significand, exponent, estimated_power, need_boundary_deltas,
+        numerator, denominator, delta_minus, delta_plus);
+  } else if (estimated_power >= 0) {
+    InitialScaledStartValuesNegativeExponentPositivePower(
+        significand, exponent, estimated_power, need_boundary_deltas,
+        numerator, denominator, delta_minus, delta_plus);
+  } else {
+    InitialScaledStartValuesNegativeExponentNegativePower(
+        significand, exponent, estimated_power, need_boundary_deltas,
+        numerator, denominator, delta_minus, delta_plus);
+  }
+
+  if (need_boundary_deltas && lower_boundary_is_closer) {
+    // The lower boundary is closer at half the distance of "normal" numbers.
+    // Increase the common denominator and adapt all but the delta_minus.
+    denominator->ShiftLeft(1);  // *2
+    numerator->ShiftLeft(1);    // *2
+    delta_plus->ShiftLeft(1);   // *2
+  }
+}
+
+
+// This routine multiplies numerator/denominator so that its values lies in the
+// range 1-10. That is after a call to this function we have:
+//    1 <= (numerator + delta_plus) /denominator < 10.
+// Let numerator the input before modification and numerator' the argument
+// after modification, then the output-parameter decimal_point is such that
+//  numerator / denominator * 10^estimated_power ==
+//    numerator' / denominator' * 10^(decimal_point - 1)
+// In some cases estimated_power was too low, and this is already the case. We
+// then simply adjust the power so that 10^(k-1) <= v < 10^k (with k ==
+// estimated_power) but do not touch the numerator or denominator.
+// Otherwise the routine multiplies the numerator and the deltas by 10.
+static void FixupMultiply10(int estimated_power, bool is_even,
+                            int* decimal_point,
+                            Bignum* numerator, Bignum* denominator,
+                            Bignum* delta_minus, Bignum* delta_plus) {
+  bool in_range;
+  if (is_even) {
+    // For IEEE doubles half-way cases (in decimal system numbers ending with 5)
+    // are rounded to the closest floating-point number with even significand.
+    in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0;
+  } else {
+    in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0;
+  }
+  if (in_range) {
+    // Since numerator + delta_plus >= denominator we already have
+    // 1 <= numerator/denominator < 10. Simply update the estimated_power.
+    *decimal_point = estimated_power + 1;
+  } else {
+    *decimal_point = estimated_power;
+    numerator->Times10();
+    if (Bignum::Equal(*delta_minus, *delta_plus)) {
+      delta_minus->Times10();
+      delta_plus->AssignBignum(*delta_minus);
+    } else {
+      delta_minus->Times10();
+      delta_plus->Times10();
+    }
+  }
+}
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/bignum-dtoa.h b/klm/util/double-conversion/bignum-dtoa.h
new file mode 100644
index 00000000..34b96199
--- /dev/null
+++ b/klm/util/double-conversion/bignum-dtoa.h
@@ -0,0 +1,84 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_BIGNUM_DTOA_H_
+#define DOUBLE_CONVERSION_BIGNUM_DTOA_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+enum BignumDtoaMode {
+  // Return the shortest correct representation.
+  // For example the output of 0.299999999999999988897 is (the less accurate but
+  // correct) 0.3.
+  BIGNUM_DTOA_SHORTEST,
+  // Same as BIGNUM_DTOA_SHORTEST but for single-precision floats.
+  BIGNUM_DTOA_SHORTEST_SINGLE,
+  // Return a fixed number of digits after the decimal point.
+  // For instance fixed(0.1, 4) becomes 0.1000
+  // If the input number is big, the output will be big.
+  BIGNUM_DTOA_FIXED,
+  // Return a fixed number of digits, no matter what the exponent is.
+  BIGNUM_DTOA_PRECISION
+};
+
+// Converts the given double 'v' to ascii.
+// The result should be interpreted as buffer * 10^(point-length).
+// The buffer will be null-terminated.
+//
+// The input v must be > 0 and different from NaN, and Infinity.
+//
+// The output depends on the given mode:
+//  - SHORTEST: produce the least amount of digits for which the internal
+//   identity requirement is still satisfied. If the digits are printed
+//   (together with the correct exponent) then reading this number will give
+//   'v' again. The buffer will choose the representation that is closest to
+//   'v'. If there are two at the same distance, than the number is round up.
+//   In this mode the 'requested_digits' parameter is ignored.
+//  - FIXED: produces digits necessary to print a given number with
+//   'requested_digits' digits after the decimal point. The produced digits
+//   might be too short in which case the caller has to fill the gaps with '0's.
+//   Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2.
+//   Halfway cases are rounded up. The call toFixed(0.15, 2) thus returns
+//     buffer="2", point=0.
+//   Note: the length of the returned buffer has no meaning wrt the significance
+//   of its digits. That is, just because it contains '0's does not mean that
+//   any other digit would not satisfy the internal identity requirement.
+//  - PRECISION: produces 'requested_digits' where the first digit is not '0'.
+//   Even though the length of produced digits usually equals
+//   'requested_digits', the function is allowed to return fewer digits, in
+//   which case the caller has to fill the missing digits with '0's.
+//   Halfway cases are again rounded up.
+// 'BignumDtoa' expects the given buffer to be big enough to hold all digits
+// and a terminating null-character.
+void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits,
+                Vector<char> buffer, int* length, int* point);
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_BIGNUM_DTOA_H_
diff --git a/klm/util/double-conversion/bignum.cc b/klm/util/double-conversion/bignum.cc
new file mode 100644
index 00000000..747491a0
--- /dev/null
+++ b/klm/util/double-conversion/bignum.cc
@@ -0,0 +1,764 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "bignum.h"
+#include "utils.h"
+
+namespace double_conversion {
+
+Bignum::Bignum()
+    : bigits_(bigits_buffer_, kBigitCapacity), used_digits_(0), exponent_(0) {
+  for (int i = 0; i < kBigitCapacity; ++i) {
+    bigits_[i] = 0;
+  }
+}
+
+
+template<typename S>
+static int BitSize(S value) {
+  return 8 * sizeof(value);
+}
+
+// Guaranteed to lie in one Bigit.
+void Bignum::AssignUInt16(uint16_t value) {
+  ASSERT(kBigitSize >= BitSize(value));
+  Zero();
+  if (value == 0) return;
+
+  EnsureCapacity(1);
+  bigits_[0] = value;
+  used_digits_ = 1;
+}
+
+
+void Bignum::AssignUInt64(uint64_t value) {
+  const int kUInt64Size = 64;
+
+  Zero();
+  if (value == 0) return;
+
+  int needed_bigits = kUInt64Size / kBigitSize + 1;
+  EnsureCapacity(needed_bigits);
+  for (int i = 0; i < needed_bigits; ++i) {
+    bigits_[i] = value & kBigitMask;
+    value = value >> kBigitSize;
+  }
+  used_digits_ = needed_bigits;
+  Clamp();
+}
+
+
+void Bignum::AssignBignum(const Bignum& other) {
+  exponent_ = other.exponent_;
+  for (int i = 0; i < other.used_digits_; ++i) {
+    bigits_[i] = other.bigits_[i];
+  }
+  // Clear the excess digits (if there were any).
+  for (int i = other.used_digits_; i < used_digits_; ++i) {
+    bigits_[i] = 0;
+  }
+  used_digits_ = other.used_digits_;
+}
+
+
+static uint64_t ReadUInt64(Vector<const char> buffer,
+                           int from,
+                           int digits_to_read) {
+  uint64_t result = 0;
+  for (int i = from; i < from + digits_to_read; ++i) {
+    int digit = buffer[i] - '0';
+    ASSERT(0 <= digit && digit <= 9);
+    result = result * 10 + digit;
+  }
+  return result;
+}
+
+
+void Bignum::AssignDecimalString(Vector<const char> value) {
+  // 2^64 = 18446744073709551616 > 10^19
+  const int kMaxUint64DecimalDigits = 19;
+  Zero();
+  int length = value.length();
+  int pos = 0;
+  // Let's just say that each digit needs 4 bits.
+  while (length >= kMaxUint64DecimalDigits) {
+    uint64_t digits = ReadUInt64(value, pos, kMaxUint64DecimalDigits);
+    pos += kMaxUint64DecimalDigits;
+    length -= kMaxUint64DecimalDigits;
+    MultiplyByPowerOfTen(kMaxUint64DecimalDigits);
+    AddUInt64(digits);
+  }
+  uint64_t digits = ReadUInt64(value, pos, length);
+  MultiplyByPowerOfTen(length);
+  AddUInt64(digits);
+  Clamp();
+}
+
+
+static int HexCharValue(char c) {
+  if ('0' <= c && c <= '9') return c - '0';
+  if ('a' <= c && c <= 'f') return 10 + c - 'a';
+  if ('A' <= c && c <= 'F') return 10 + c - 'A';
+  UNREACHABLE();
+  return 0;  // To make compiler happy.
+}
+
+
+void Bignum::AssignHexString(Vector<const char> value) {
+  Zero();
+  int length = value.length();
+
+  int needed_bigits = length * 4 / kBigitSize + 1;
+  EnsureCapacity(needed_bigits);
+  int string_index = length - 1;
+  for (int i = 0; i < needed_bigits - 1; ++i) {
+    // These bigits are guaranteed to be "full".
+    Chunk current_bigit = 0;
+    for (int j = 0; j < kBigitSize / 4; j++) {
+      current_bigit += HexCharValue(value[string_index--]) << (j * 4);
+    }
+    bigits_[i] = current_bigit;
+  }
+  used_digits_ = needed_bigits - 1;
+
+  Chunk most_significant_bigit = 0;  // Could be = 0;
+  for (int j = 0; j <= string_index; ++j) {
+    most_significant_bigit <<= 4;
+    most_significant_bigit += HexCharValue(value[j]);
+  }
+  if (most_significant_bigit != 0) {
+    bigits_[used_digits_] = most_significant_bigit;
+    used_digits_++;
+  }
+  Clamp();
+}
+
+
+void Bignum::AddUInt64(uint64_t operand) {
+  if (operand == 0) return;
+  Bignum other;
+  other.AssignUInt64(operand);
+  AddBignum(other);
+}
+
+
+void Bignum::AddBignum(const Bignum& other) {
+  ASSERT(IsClamped());
+  ASSERT(other.IsClamped());
+
+  // If this has a greater exponent than other append zero-bigits to this.
+  // After this call exponent_ <= other.exponent_.
+  Align(other);
+
+  // There are two possibilities:
+  //   aaaaaaaaaaa 0000  (where the 0s represent a's exponent)
+  //     bbbbb 00000000
+  //   ----------------
+  //   ccccccccccc 0000
+  // or
+  //    aaaaaaaaaa 0000
+  //  bbbbbbbbb 0000000
+  //  -----------------
+  //  cccccccccccc 0000
+  // In both cases we might need a carry bigit.
+
+  EnsureCapacity(1 + Max(BigitLength(), other.BigitLength()) - exponent_);
+  Chunk carry = 0;
+  int bigit_pos = other.exponent_ - exponent_;
+  ASSERT(bigit_pos >= 0);
+  for (int i = 0; i < other.used_digits_; ++i) {
+    Chunk sum = bigits_[bigit_pos] + other.bigits_[i] + carry;
+    bigits_[bigit_pos] = sum & kBigitMask;
+    carry = sum >> kBigitSize;
+    bigit_pos++;
+  }
+
+  while (carry != 0) {
+    Chunk sum = bigits_[bigit_pos] + carry;
+    bigits_[bigit_pos] = sum & kBigitMask;
+    carry = sum >> kBigitSize;
+    bigit_pos++;
+  }
+  used_digits_ = Max(bigit_pos, used_digits_);
+  ASSERT(IsClamped());
+}
+
+
+void Bignum::SubtractBignum(const Bignum& other) {
+  ASSERT(IsClamped());
+  ASSERT(other.IsClamped());
+  // We require this to be bigger than other.
+  ASSERT(LessEqual(other, *this));
+
+  Align(other);
+
+  int offset = other.exponent_ - exponent_;
+  Chunk borrow = 0;
+  int i;
+  for (i = 0; i < other.used_digits_; ++i) {
+    ASSERT((borrow == 0) || (borrow == 1));
+    Chunk difference = bigits_[i + offset] - other.bigits_[i] - borrow;
+    bigits_[i + offset] = difference & kBigitMask;
+    borrow = difference >> (kChunkSize - 1);
+  }
+  while (borrow != 0) {
+    Chunk difference = bigits_[i + offset] - borrow;
+    bigits_[i + offset] = difference & kBigitMask;
+    borrow = difference >> (kChunkSize - 1);
+    ++i;
+  }
+  Clamp();
+}
+
+
+void Bignum::ShiftLeft(int shift_amount) {
+  if (used_digits_ == 0) return;
+  exponent_ += shift_amount / kBigitSize;
+  int local_shift = shift_amount % kBigitSize;
+  EnsureCapacity(used_digits_ + 1);
+  BigitsShiftLeft(local_shift);
+}
+
+
+void Bignum::MultiplyByUInt32(uint32_t factor) {
+  if (factor == 1) return;
+  if (factor == 0) {
+    Zero();
+    return;
+  }
+  if (used_digits_ == 0) return;
+
+  // The product of a bigit with the factor is of size kBigitSize + 32.
+  // Assert that this number + 1 (for the carry) fits into double chunk.
+  ASSERT(kDoubleChunkSize >= kBigitSize + 32 + 1);
+  DoubleChunk carry = 0;
+  for (int i = 0; i < used_digits_; ++i) {
+    DoubleChunk product = static_cast<DoubleChunk>(factor) * bigits_[i] + carry;
+    bigits_[i] = static_cast<Chunk>(product & kBigitMask);
+    carry = (product >> kBigitSize);
+  }
+  while (carry != 0) {
+    EnsureCapacity(used_digits_ + 1);
+    bigits_[used_digits_] = carry & kBigitMask;
+    used_digits_++;
+    carry >>= kBigitSize;
+  }
+}
+
+
+void Bignum::MultiplyByUInt64(uint64_t factor) {
+  if (factor == 1) return;
+  if (factor == 0) {
+    Zero();
+    return;
+  }
+  ASSERT(kBigitSize < 32);
+  uint64_t carry = 0;
+  uint64_t low = factor & 0xFFFFFFFF;
+  uint64_t high = factor >> 32;
+  for (int i = 0; i < used_digits_; ++i) {
+    uint64_t product_low = low * bigits_[i];
+    uint64_t product_high = high * bigits_[i];
+    uint64_t tmp = (carry & kBigitMask) + product_low;
+    bigits_[i] = tmp & kBigitMask;
+    carry = (carry >> kBigitSize) + (tmp >> kBigitSize) +
+        (product_high << (32 - kBigitSize));
+  }
+  while (carry != 0) {
+    EnsureCapacity(used_digits_ + 1);
+    bigits_[used_digits_] = carry & kBigitMask;
+    used_digits_++;
+    carry >>= kBigitSize;
+  }
+}
+
+
+void Bignum::MultiplyByPowerOfTen(int exponent) {
+  const uint64_t kFive27 = UINT64_2PART_C(0x6765c793, fa10079d);
+  const uint16_t kFive1 = 5;
+  const uint16_t kFive2 = kFive1 * 5;
+  const uint16_t kFive3 = kFive2 * 5;
+  const uint16_t kFive4 = kFive3 * 5;
+  const uint16_t kFive5 = kFive4 * 5;
+  const uint16_t kFive6 = kFive5 * 5;
+  const uint32_t kFive7 = kFive6 * 5;
+  const uint32_t kFive8 = kFive7 * 5;
+  const uint32_t kFive9 = kFive8 * 5;
+  const uint32_t kFive10 = kFive9 * 5;
+  const uint32_t kFive11 = kFive10 * 5;
+  const uint32_t kFive12 = kFive11 * 5;
+  const uint32_t kFive13 = kFive12 * 5;
+  const uint32_t kFive1_to_12[] =
+      { kFive1, kFive2, kFive3, kFive4, kFive5, kFive6,
+        kFive7, kFive8, kFive9, kFive10, kFive11, kFive12 };
+
+  ASSERT(exponent >= 0);
+  if (exponent == 0) return;
+  if (used_digits_ == 0) return;
+
+  // We shift by exponent at the end just before returning.
+  int remaining_exponent = exponent;
+  while (remaining_exponent >= 27) {
+    MultiplyByUInt64(kFive27);
+    remaining_exponent -= 27;
+  }
+  while (remaining_exponent >= 13) {
+    MultiplyByUInt32(kFive13);
+    remaining_exponent -= 13;
+  }
+  if (remaining_exponent > 0) {
+    MultiplyByUInt32(kFive1_to_12[remaining_exponent - 1]);
+  }
+  ShiftLeft(exponent);
+}
+
+
+void Bignum::Square() {
+  ASSERT(IsClamped());
+  int product_length = 2 * used_digits_;
+  EnsureCapacity(product_length);
+
+  // Comba multiplication: compute each column separately.
+  // Example: r = a2a1a0 * b2b1b0.
+  //    r =  1    * a0b0 +
+  //        10    * (a1b0 + a0b1) +
+  //        100   * (a2b0 + a1b1 + a0b2) +
+  //        1000  * (a2b1 + a1b2) +
+  //        10000 * a2b2
+  //
+  // In the worst case we have to accumulate nb-digits products of digit*digit.
+  //
+  // Assert that the additional number of bits in a DoubleChunk are enough to
+  // sum up used_digits of Bigit*Bigit.
+  if ((1 << (2 * (kChunkSize - kBigitSize))) <= used_digits_) {
+    UNIMPLEMENTED();
+  }
+  DoubleChunk accumulator = 0;
+  // First shift the digits so we don't overwrite them.
+  int copy_offset = used_digits_;
+  for (int i = 0; i < used_digits_; ++i) {
+    bigits_[copy_offset + i] = bigits_[i];
+  }
+  // We have two loops to avoid some 'if's in the loop.
+  for (int i = 0; i < used_digits_; ++i) {
+    // Process temporary digit i with power i.
+    // The sum of the two indices must be equal to i.
+    int bigit_index1 = i;
+    int bigit_index2 = 0;
+    // Sum all of the sub-products.
+    while (bigit_index1 >= 0) {
+      Chunk chunk1 = bigits_[copy_offset + bigit_index1];
+      Chunk chunk2 = bigits_[copy_offset + bigit_index2];
+      accumulator += static_cast<DoubleChunk>(chunk1) * chunk2;
+      bigit_index1--;
+      bigit_index2++;
+    }
+    bigits_[i] = static_cast<Chunk>(accumulator) & kBigitMask;
+    accumulator >>= kBigitSize;
+  }
+  for (int i = used_digits_; i < product_length; ++i) {
+    int bigit_index1 = used_digits_ - 1;
+    int bigit_index2 = i - bigit_index1;
+    // Invariant: sum of both indices is again equal to i.
+    // Inner loop runs 0 times on last iteration, emptying accumulator.
+    while (bigit_index2 < used_digits_) {
+      Chunk chunk1 = bigits_[copy_offset + bigit_index1];
+      Chunk chunk2 = bigits_[copy_offset + bigit_index2];
+      accumulator += static_cast<DoubleChunk>(chunk1) * chunk2;
+      bigit_index1--;
+      bigit_index2++;
+    }
+    // The overwritten bigits_[i] will never be read in further loop iterations,
+    // because bigit_index1 and bigit_index2 are always greater
+    // than i - used_digits_.
+    bigits_[i] = static_cast<Chunk>(accumulator) & kBigitMask;
+    accumulator >>= kBigitSize;
+  }
+  // Since the result was guaranteed to lie inside the number the
+  // accumulator must be 0 now.
+  ASSERT(accumulator == 0);
+
+  // Don't forget to update the used_digits and the exponent.
+  used_digits_ = product_length;
+  exponent_ *= 2;
+  Clamp();
+}
+
+
+void Bignum::AssignPowerUInt16(uint16_t base, int power_exponent) {
+  ASSERT(base != 0);
+  ASSERT(power_exponent >= 0);
+  if (power_exponent == 0) {
+    AssignUInt16(1);
+    return;
+  }
+  Zero();
+  int shifts = 0;
+  // We expect base to be in range 2-32, and most often to be 10.
+  // It does not make much sense to implement different algorithms for counting
+  // the bits.
+  while ((base & 1) == 0) {
+    base >>= 1;
+    shifts++;
+  }
+  int bit_size = 0;
+  int tmp_base = base;
+  while (tmp_base != 0) {
+    tmp_base >>= 1;
+    bit_size++;
+  }
+  int final_size = bit_size * power_exponent;
+  // 1 extra bigit for the shifting, and one for rounded final_size.
+  EnsureCapacity(final_size / kBigitSize + 2);
+
+  // Left to Right exponentiation.
+  int mask = 1;
+  while (power_exponent >= mask) mask <<= 1;
+
+  // The mask is now pointing to the bit above the most significant 1-bit of
+  // power_exponent.
+  // Get rid of first 1-bit;
+  mask >>= 2;
+  uint64_t this_value = base;
+
+  bool delayed_multipliciation = false;
+  const uint64_t max_32bits = 0xFFFFFFFF;
+  while (mask != 0 && this_value <= max_32bits) {
+    this_value = this_value * this_value;
+    // Verify that there is enough space in this_value to perform the
+    // multiplication.  The first bit_size bits must be 0.
+    if ((power_exponent & mask) != 0) {
+      uint64_t base_bits_mask =
+          ~((static_cast<uint64_t>(1) << (64 - bit_size)) - 1);
+      bool high_bits_zero = (this_value & base_bits_mask) == 0;
+      if (high_bits_zero) {
+        this_value *= base;
+      } else {
+        delayed_multipliciation = true;
+      }
+    }
+    mask >>= 1;
+  }
+  AssignUInt64(this_value);
+  if (delayed_multipliciation) {
+    MultiplyByUInt32(base);
+  }
+
+  // Now do the same thing as a bignum.
+  while (mask != 0) {
+    Square();
+    if ((power_exponent & mask) != 0) {
+      MultiplyByUInt32(base);
+    }
+    mask >>= 1;
+  }
+
+  // And finally add the saved shifts.
+  ShiftLeft(shifts * power_exponent);
+}
+
+
+// Precondition: this/other < 16bit.
+uint16_t Bignum::DivideModuloIntBignum(const Bignum& other) {
+  ASSERT(IsClamped());
+  ASSERT(other.IsClamped());
+  ASSERT(other.used_digits_ > 0);
+
+  // Easy case: if we have less digits than the divisor than the result is 0.
+  // Note: this handles the case where this == 0, too.
+  if (BigitLength() < other.BigitLength()) {
+    return 0;
+  }
+
+  Align(other);
+
+  uint16_t result = 0;
+
+  // Start by removing multiples of 'other' until both numbers have the same
+  // number of digits.
+  while (BigitLength() > other.BigitLength()) {
+    // This naive approach is extremely inefficient if the this divided other
+    // might be big. This function is implemented for doubleToString where
+    // the result should be small (less than 10).
+    ASSERT(other.bigits_[other.used_digits_ - 1] >= ((1 << kBigitSize) / 16));
+    // Remove the multiples of the first digit.
+    // Example this = 23 and other equals 9. -> Remove 2 multiples.
+    result += bigits_[used_digits_ - 1];
+    SubtractTimes(other, bigits_[used_digits_ - 1]);
+  }
+
+  ASSERT(BigitLength() == other.BigitLength());
+
+  // Both bignums are at the same length now.
+  // Since other has more than 0 digits we know that the access to
+  // bigits_[used_digits_ - 1] is safe.
+  Chunk this_bigit = bigits_[used_digits_ - 1];
+  Chunk other_bigit = other.bigits_[other.used_digits_ - 1];
+
+  if (other.used_digits_ == 1) {
+    // Shortcut for easy (and common) case.
+    int quotient = this_bigit / other_bigit;
+    bigits_[used_digits_ - 1] = this_bigit - other_bigit * quotient;
+    result += quotient;
+    Clamp();
+    return result;
+  }
+
+  int division_estimate = this_bigit / (other_bigit + 1);
+  result += division_estimate;
+  SubtractTimes(other, division_estimate);
+
+  if (other_bigit * (division_estimate + 1) > this_bigit) {
+    // No need to even try to subtract. Even if other's remaining digits were 0
+    // another subtraction would be too much.
+    return result;
+  }
+
+  while (LessEqual(other, *this)) {
+    SubtractBignum(other);
+    result++;
+  }
+  return result;
+}
+
+
+template<typename S>
+static int SizeInHexChars(S number) {
+  ASSERT(number > 0);
+  int result = 0;
+  while (number != 0) {
+    number >>= 4;
+    result++;
+  }
+  return result;
+}
+
+
+static char HexCharOfValue(int value) {
+  ASSERT(0 <= value && value <= 16);
+  if (value < 10) return value + '0';
+  return value - 10 + 'A';
+}
+
+
+bool Bignum::ToHexString(char* buffer, int buffer_size) const {
+  ASSERT(IsClamped());
+  // Each bigit must be printable as separate hex-character.
+  ASSERT(kBigitSize % 4 == 0);
+  const int kHexCharsPerBigit = kBigitSize / 4;
+
+  if (used_digits_ == 0) {
+    if (buffer_size < 2) return false;
+    buffer[0] = '0';
+    buffer[1] = '\0';
+    return true;
+  }
+  // We add 1 for the terminating '\0' character.
+  int needed_chars = (BigitLength() - 1) * kHexCharsPerBigit +
+      SizeInHexChars(bigits_[used_digits_ - 1]) + 1;
+  if (needed_chars > buffer_size) return false;
+  int string_index = needed_chars - 1;
+  buffer[string_index--] = '\0';
+  for (int i = 0; i < exponent_; ++i) {
+    for (int j = 0; j < kHexCharsPerBigit; ++j) {
+      buffer[string_index--] = '0';
+    }
+  }
+  for (int i = 0; i < used_digits_ - 1; ++i) {
+    Chunk current_bigit = bigits_[i];
+    for (int j = 0; j < kHexCharsPerBigit; ++j) {
+      buffer[string_index--] = HexCharOfValue(current_bigit & 0xF);
+      current_bigit >>= 4;
+    }
+  }
+  // And finally the last bigit.
+  Chunk most_significant_bigit = bigits_[used_digits_ - 1];
+  while (most_significant_bigit != 0) {
+    buffer[string_index--] = HexCharOfValue(most_significant_bigit & 0xF);
+    most_significant_bigit >>= 4;
+  }
+  return true;
+}
+
+
+Bignum::Chunk Bignum::BigitAt(int index) const {
+  if (index >= BigitLength()) return 0;
+  if (index < exponent_) return 0;
+  return bigits_[index - exponent_];
+}
+
+
+int Bignum::Compare(const Bignum& a, const Bignum& b) {
+  ASSERT(a.IsClamped());
+  ASSERT(b.IsClamped());
+  int bigit_length_a = a.BigitLength();
+  int bigit_length_b = b.BigitLength();
+  if (bigit_length_a < bigit_length_b) return -1;
+  if (bigit_length_a > bigit_length_b) return +1;
+  for (int i = bigit_length_a - 1; i >= Min(a.exponent_, b.exponent_); --i) {
+    Chunk bigit_a = a.BigitAt(i);
+    Chunk bigit_b = b.BigitAt(i);
+    if (bigit_a < bigit_b) return -1;
+    if (bigit_a > bigit_b) return +1;
+    // Otherwise they are equal up to this digit. Try the next digit.
+  }
+  return 0;
+}
+
+
+int Bignum::PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c) {
+  ASSERT(a.IsClamped());
+  ASSERT(b.IsClamped());
+  ASSERT(c.IsClamped());
+  if (a.BigitLength() < b.BigitLength()) {
+    return PlusCompare(b, a, c);
+  }
+  if (a.BigitLength() + 1 < c.BigitLength()) return -1;
+  if (a.BigitLength() > c.BigitLength()) return +1;
+  // The exponent encodes 0-bigits. So if there are more 0-digits in 'a' than
+  // 'b' has digits, then the bigit-length of 'a'+'b' must be equal to the one
+  // of 'a'.
+  if (a.exponent_ >= b.BigitLength() && a.BigitLength() < c.BigitLength()) {
+    return -1;
+  }
+
+  Chunk borrow = 0;
+  // Starting at min_exponent all digits are == 0. So no need to compare them.
+  int min_exponent = Min(Min(a.exponent_, b.exponent_), c.exponent_);
+  for (int i = c.BigitLength() - 1; i >= min_exponent; --i) {
+    Chunk chunk_a = a.BigitAt(i);
+    Chunk chunk_b = b.BigitAt(i);
+    Chunk chunk_c = c.BigitAt(i);
+    Chunk sum = chunk_a + chunk_b;
+    if (sum > chunk_c + borrow) {
+      return +1;
+    } else {
+      borrow = chunk_c + borrow - sum;
+      if (borrow > 1) return -1;
+      borrow <<= kBigitSize;
+    }
+  }
+  if (borrow == 0) return 0;
+  return -1;
+}
+
+
+void Bignum::Clamp() {
+  while (used_digits_ > 0 && bigits_[used_digits_ - 1] == 0) {
+    used_digits_--;
+  }
+  if (used_digits_ == 0) {
+    // Zero.
+    exponent_ = 0;
+  }
+}
+
+
+bool Bignum::IsClamped() const {
+  return used_digits_ == 0 || bigits_[used_digits_ - 1] != 0;
+}
+
+
+void Bignum::Zero() {
+  for (int i = 0; i < used_digits_; ++i) {
+    bigits_[i] = 0;
+  }
+  used_digits_ = 0;
+  exponent_ = 0;
+}
+
+
+void Bignum::Align(const Bignum& other) {
+  if (exponent_ > other.exponent_) {
+    // If "X" represents a "hidden" digit (by the exponent) then we are in the
+    // following case (a == this, b == other):
+    // a:  aaaaaaXXXX   or a:   aaaaaXXX
+    // b:     bbbbbbX      b: bbbbbbbbXX
+    // We replace some of the hidden digits (X) of a with 0 digits.
+    // a:  aaaaaa000X   or a:   aaaaa0XX
+    int zero_digits = exponent_ - other.exponent_;
+    EnsureCapacity(used_digits_ + zero_digits);
+    for (int i = used_digits_ - 1; i >= 0; --i) {
+      bigits_[i + zero_digits] = bigits_[i];
+    }
+    for (int i = 0; i < zero_digits; ++i) {
+      bigits_[i] = 0;
+    }
+    used_digits_ += zero_digits;
+    exponent_ -= zero_digits;
+    ASSERT(used_digits_ >= 0);
+    ASSERT(exponent_ >= 0);
+  }
+}
+
+
+void Bignum::BigitsShiftLeft(int shift_amount) {
+  ASSERT(shift_amount < kBigitSize);
+  ASSERT(shift_amount >= 0);
+  Chunk carry = 0;
+  for (int i = 0; i < used_digits_; ++i) {
+    Chunk new_carry = bigits_[i] >> (kBigitSize - shift_amount);
+    bigits_[i] = ((bigits_[i] << shift_amount) + carry) & kBigitMask;
+    carry = new_carry;
+  }
+  if (carry != 0) {
+    bigits_[used_digits_] = carry;
+    used_digits_++;
+  }
+}
+
+
+void Bignum::SubtractTimes(const Bignum& other, int factor) {
+  ASSERT(exponent_ <= other.exponent_);
+  if (factor < 3) {
+    for (int i = 0; i < factor; ++i) {
+      SubtractBignum(other);
+    }
+    return;
+  }
+  Chunk borrow = 0;
+  int exponent_diff = other.exponent_ - exponent_;
+  for (int i = 0; i < other.used_digits_; ++i) {
+    DoubleChunk product = static_cast<DoubleChunk>(factor) * other.bigits_[i];
+    DoubleChunk remove = borrow + product;
+    Chunk difference = bigits_[i + exponent_diff] - (remove & kBigitMask);
+    bigits_[i + exponent_diff] = difference & kBigitMask;
+    borrow = static_cast<Chunk>((difference >> (kChunkSize - 1)) +
+                                (remove >> kBigitSize));
+  }
+  for (int i = other.used_digits_ + exponent_diff; i < used_digits_; ++i) {
+    if (borrow == 0) return;
+    Chunk difference = bigits_[i] - borrow;
+    bigits_[i] = difference & kBigitMask;
+    borrow = difference >> (kChunkSize - 1);
+    ++i;
+  }
+  Clamp();
+}
+
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/bignum.h b/klm/util/double-conversion/bignum.h
new file mode 100644
index 00000000..5ec3544f
--- /dev/null
+++ b/klm/util/double-conversion/bignum.h
@@ -0,0 +1,145 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_BIGNUM_H_
+#define DOUBLE_CONVERSION_BIGNUM_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+class Bignum {
+ public:
+  // 3584 = 128 * 28. We can represent 2^3584 > 10^1000 accurately.
+  // This bignum can encode much bigger numbers, since it contains an
+  // exponent.
+  static const int kMaxSignificantBits = 3584;
+
+  Bignum();
+  void AssignUInt16(uint16_t value);
+  void AssignUInt64(uint64_t value);
+  void AssignBignum(const Bignum& other);
+
+  void AssignDecimalString(Vector<const char> value);
+  void AssignHexString(Vector<const char> value);
+
+  void AssignPowerUInt16(uint16_t base, int exponent);
+
+  void AddUInt16(uint16_t operand);
+  void AddUInt64(uint64_t operand);
+  void AddBignum(const Bignum& other);
+  // Precondition: this >= other.
+  void SubtractBignum(const Bignum& other);
+
+  void Square();
+  void ShiftLeft(int shift_amount);
+  void MultiplyByUInt32(uint32_t factor);
+  void MultiplyByUInt64(uint64_t factor);
+  void MultiplyByPowerOfTen(int exponent);
+  void Times10() { return MultiplyByUInt32(10); }
+  // Pseudocode:
+  //  int result = this / other;
+  //  this = this % other;
+  // In the worst case this function is in O(this/other).
+  uint16_t DivideModuloIntBignum(const Bignum& other);
+
+  bool ToHexString(char* buffer, int buffer_size) const;
+
+  // Returns
+  //  -1 if a < b,
+  //   0 if a == b, and
+  //  +1 if a > b.
+  static int Compare(const Bignum& a, const Bignum& b);
+  static bool Equal(const Bignum& a, const Bignum& b) {
+    return Compare(a, b) == 0;
+  }
+  static bool LessEqual(const Bignum& a, const Bignum& b) {
+    return Compare(a, b) <= 0;
+  }
+  static bool Less(const Bignum& a, const Bignum& b) {
+    return Compare(a, b) < 0;
+  }
+  // Returns Compare(a + b, c);
+  static int PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c);
+  // Returns a + b == c
+  static bool PlusEqual(const Bignum& a, const Bignum& b, const Bignum& c) {
+    return PlusCompare(a, b, c) == 0;
+  }
+  // Returns a + b <= c
+  static bool PlusLessEqual(const Bignum& a, const Bignum& b, const Bignum& c) {
+    return PlusCompare(a, b, c) <= 0;
+  }
+  // Returns a + b < c
+  static bool PlusLess(const Bignum& a, const Bignum& b, const Bignum& c) {
+    return PlusCompare(a, b, c) < 0;
+  }
+ private:
+  typedef uint32_t Chunk;
+  typedef uint64_t DoubleChunk;
+
+  static const int kChunkSize = sizeof(Chunk) * 8;
+  static const int kDoubleChunkSize = sizeof(DoubleChunk) * 8;
+  // With bigit size of 28 we loose some bits, but a double still fits easily
+  // into two chunks, and more importantly we can use the Comba multiplication.
+  static const int kBigitSize = 28;
+  static const Chunk kBigitMask = (1 << kBigitSize) - 1;
+  // Every instance allocates kBigitLength chunks on the stack. Bignums cannot
+  // grow. There are no checks if the stack-allocated space is sufficient.
+  static const int kBigitCapacity = kMaxSignificantBits / kBigitSize;
+
+  void EnsureCapacity(int size) {
+    if (size > kBigitCapacity) {
+      UNREACHABLE();
+    }
+  }
+  void Align(const Bignum& other);
+  void Clamp();
+  bool IsClamped() const;
+  void Zero();
+  // Requires this to have enough capacity (no tests done).
+  // Updates used_digits_ if necessary.
+  // shift_amount must be < kBigitSize.
+  void BigitsShiftLeft(int shift_amount);
+  // BigitLength includes the "hidden" digits encoded in the exponent.
+  int BigitLength() const { return used_digits_ + exponent_; }
+  Chunk BigitAt(int index) const;
+  void SubtractTimes(const Bignum& other, int factor);
+
+  Chunk bigits_buffer_[kBigitCapacity];
+  // A vector backed by bigits_buffer_. This way accesses to the array are
+  // checked for out-of-bounds errors.
+  Vector<Chunk> bigits_;
+  int used_digits_;
+  // The Bignum's value equals value(bigits_) * 2^(exponent_ * kBigitSize).
+  int exponent_;
+
+  DISALLOW_COPY_AND_ASSIGN(Bignum);
+};
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_BIGNUM_H_
diff --git a/klm/util/double-conversion/cached-powers.cc b/klm/util/double-conversion/cached-powers.cc
new file mode 100644
index 00000000..c6764291
--- /dev/null
+++ b/klm/util/double-conversion/cached-powers.cc
@@ -0,0 +1,175 @@
+// Copyright 2006-2008 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <stdarg.h>
+#include <limits.h>
+#include <math.h>
+
+#include "utils.h"
+
+#include "cached-powers.h"
+
+namespace double_conversion {
+
+struct CachedPower {
+  uint64_t significand;
+  int16_t binary_exponent;
+  int16_t decimal_exponent;
+};
+
+static const CachedPower kCachedPowers[] = {
+  {UINT64_2PART_C(0xfa8fd5a0, 081c0288), -1220, -348},
+  {UINT64_2PART_C(0xbaaee17f, a23ebf76), -1193, -340},
+  {UINT64_2PART_C(0x8b16fb20, 3055ac76), -1166, -332},
+  {UINT64_2PART_C(0xcf42894a, 5dce35ea), -1140, -324},
+  {UINT64_2PART_C(0x9a6bb0aa, 55653b2d), -1113, -316},
+  {UINT64_2PART_C(0xe61acf03, 3d1a45df), -1087, -308},
+  {UINT64_2PART_C(0xab70fe17, c79ac6ca), -1060, -300},
+  {UINT64_2PART_C(0xff77b1fc, bebcdc4f), -1034, -292},
+  {UINT64_2PART_C(0xbe5691ef, 416bd60c), -1007, -284},
+  {UINT64_2PART_C(0x8dd01fad, 907ffc3c), -980, -276},
+  {UINT64_2PART_C(0xd3515c28, 31559a83), -954, -268},
+  {UINT64_2PART_C(0x9d71ac8f, ada6c9b5), -927, -260},
+  {UINT64_2PART_C(0xea9c2277, 23ee8bcb), -901, -252},
+  {UINT64_2PART_C(0xaecc4991, 4078536d), -874, -244},
+  {UINT64_2PART_C(0x823c1279, 5db6ce57), -847, -236},
+  {UINT64_2PART_C(0xc2109436, 4dfb5637), -821, -228},
+  {UINT64_2PART_C(0x9096ea6f, 3848984f), -794, -220},
+  {UINT64_2PART_C(0xd77485cb, 25823ac7), -768, -212},
+  {UINT64_2PART_C(0xa086cfcd, 97bf97f4), -741, -204},
+  {UINT64_2PART_C(0xef340a98, 172aace5), -715, -196},
+  {UINT64_2PART_C(0xb23867fb, 2a35b28e), -688, -188},
+  {UINT64_2PART_C(0x84c8d4df, d2c63f3b), -661, -180},
+  {UINT64_2PART_C(0xc5dd4427, 1ad3cdba), -635, -172},
+  {UINT64_2PART_C(0x936b9fce, bb25c996), -608, -164},
+  {UINT64_2PART_C(0xdbac6c24, 7d62a584), -582, -156},
+  {UINT64_2PART_C(0xa3ab6658, 0d5fdaf6), -555, -148},
+  {UINT64_2PART_C(0xf3e2f893, dec3f126), -529, -140},
+  {UINT64_2PART_C(0xb5b5ada8, aaff80b8), -502, -132},
+  {UINT64_2PART_C(0x87625f05, 6c7c4a8b), -475, -124},
+  {UINT64_2PART_C(0xc9bcff60, 34c13053), -449, -116},
+  {UINT64_2PART_C(0x964e858c, 91ba2655), -422, -108},
+  {UINT64_2PART_C(0xdff97724, 70297ebd), -396, -100},
+  {UINT64_2PART_C(0xa6dfbd9f, b8e5b88f), -369, -92},
+  {UINT64_2PART_C(0xf8a95fcf, 88747d94), -343, -84},
+  {UINT64_2PART_C(0xb9447093, 8fa89bcf), -316, -76},
+  {UINT64_2PART_C(0x8a08f0f8, bf0f156b), -289, -68},
+  {UINT64_2PART_C(0xcdb02555, 653131b6), -263, -60},
+  {UINT64_2PART_C(0x993fe2c6, d07b7fac), -236, -52},
+  {UINT64_2PART_C(0xe45c10c4, 2a2b3b06), -210, -44},
+  {UINT64_2PART_C(0xaa242499, 697392d3), -183, -36},
+  {UINT64_2PART_C(0xfd87b5f2, 8300ca0e), -157, -28},
+  {UINT64_2PART_C(0xbce50864, 92111aeb), -130, -20},
+  {UINT64_2PART_C(0x8cbccc09, 6f5088cc), -103, -12},
+  {UINT64_2PART_C(0xd1b71758, e219652c), -77, -4},
+  {UINT64_2PART_C(0x9c400000, 00000000), -50, 4},
+  {UINT64_2PART_C(0xe8d4a510, 00000000), -24, 12},
+  {UINT64_2PART_C(0xad78ebc5, ac620000), 3, 20},
+  {UINT64_2PART_C(0x813f3978, f8940984), 30, 28},
+  {UINT64_2PART_C(0xc097ce7b, c90715b3), 56, 36},
+  {UINT64_2PART_C(0x8f7e32ce, 7bea5c70), 83, 44},
+  {UINT64_2PART_C(0xd5d238a4, abe98068), 109, 52},
+  {UINT64_2PART_C(0x9f4f2726, 179a2245), 136, 60},
+  {UINT64_2PART_C(0xed63a231, d4c4fb27), 162, 68},
+  {UINT64_2PART_C(0xb0de6538, 8cc8ada8), 189, 76},
+  {UINT64_2PART_C(0x83c7088e, 1aab65db), 216, 84},
+  {UINT64_2PART_C(0xc45d1df9, 42711d9a), 242, 92},
+  {UINT64_2PART_C(0x924d692c, a61be758), 269, 100},
+  {UINT64_2PART_C(0xda01ee64, 1a708dea), 295, 108},
+  {UINT64_2PART_C(0xa26da399, 9aef774a), 322, 116},
+  {UINT64_2PART_C(0xf209787b, b47d6b85), 348, 124},
+  {UINT64_2PART_C(0xb454e4a1, 79dd1877), 375, 132},
+  {UINT64_2PART_C(0x865b8692, 5b9bc5c2), 402, 140},
+  {UINT64_2PART_C(0xc83553c5, c8965d3d), 428, 148},
+  {UINT64_2PART_C(0x952ab45c, fa97a0b3), 455, 156},
+  {UINT64_2PART_C(0xde469fbd, 99a05fe3), 481, 164},
+  {UINT64_2PART_C(0xa59bc234, db398c25), 508, 172},
+  {UINT64_2PART_C(0xf6c69a72, a3989f5c), 534, 180},
+  {UINT64_2PART_C(0xb7dcbf53, 54e9bece), 561, 188},
+  {UINT64_2PART_C(0x88fcf317, f22241e2), 588, 196},
+  {UINT64_2PART_C(0xcc20ce9b, d35c78a5), 614, 204},
+  {UINT64_2PART_C(0x98165af3, 7b2153df), 641, 212},
+  {UINT64_2PART_C(0xe2a0b5dc, 971f303a), 667, 220},
+  {UINT64_2PART_C(0xa8d9d153, 5ce3b396), 694, 228},
+  {UINT64_2PART_C(0xfb9b7cd9, a4a7443c), 720, 236},
+  {UINT64_2PART_C(0xbb764c4c, a7a44410), 747, 244},
+  {UINT64_2PART_C(0x8bab8eef, b6409c1a), 774, 252},
+  {UINT64_2PART_C(0xd01fef10, a657842c), 800, 260},
+  {UINT64_2PART_C(0x9b10a4e5, e9913129), 827, 268},
+  {UINT64_2PART_C(0xe7109bfb, a19c0c9d), 853, 276},
+  {UINT64_2PART_C(0xac2820d9, 623bf429), 880, 284},
+  {UINT64_2PART_C(0x80444b5e, 7aa7cf85), 907, 292},
+  {UINT64_2PART_C(0xbf21e440, 03acdd2d), 933, 300},
+  {UINT64_2PART_C(0x8e679c2f, 5e44ff8f), 960, 308},
+  {UINT64_2PART_C(0xd433179d, 9c8cb841), 986, 316},
+  {UINT64_2PART_C(0x9e19db92, b4e31ba9), 1013, 324},
+  {UINT64_2PART_C(0xeb96bf6e, badf77d9), 1039, 332},
+  {UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340},
+};
+
+static const int kCachedPowersLength = ARRAY_SIZE(kCachedPowers);
+static const int kCachedPowersOffset = 348;  // -1 * the first decimal_exponent.
+static const double kD_1_LOG2_10 = 0.30102999566398114;  //  1 / lg(10)
+// Difference between the decimal exponents in the table above.
+const int PowersOfTenCache::kDecimalExponentDistance = 8;
+const int PowersOfTenCache::kMinDecimalExponent = -348;
+const int PowersOfTenCache::kMaxDecimalExponent = 340;
+
+void PowersOfTenCache::GetCachedPowerForBinaryExponentRange(
+    int min_exponent,
+    int max_exponent,
+    DiyFp* power,
+    int* decimal_exponent) {
+  int kQ = DiyFp::kSignificandSize;
+  double k = ceil((min_exponent + kQ - 1) * kD_1_LOG2_10);
+  int foo = kCachedPowersOffset;
+  int index =
+      (foo + static_cast<int>(k) - 1) / kDecimalExponentDistance + 1;
+  ASSERT(0 <= index && index < kCachedPowersLength);
+  CachedPower cached_power = kCachedPowers[index];
+  ASSERT(min_exponent <= cached_power.binary_exponent);
+  ASSERT(cached_power.binary_exponent <= max_exponent);
+  *decimal_exponent = cached_power.decimal_exponent;
+  *power = DiyFp(cached_power.significand, cached_power.binary_exponent);
+}
+
+
+void PowersOfTenCache::GetCachedPowerForDecimalExponent(int requested_exponent,
+                                                        DiyFp* power,
+                                                        int* found_exponent) {
+  ASSERT(kMinDecimalExponent <= requested_exponent);
+  ASSERT(requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance);
+  int index =
+      (requested_exponent + kCachedPowersOffset) / kDecimalExponentDistance;
+  CachedPower cached_power = kCachedPowers[index];
+  *power = DiyFp(cached_power.significand, cached_power.binary_exponent);
+  *found_exponent = cached_power.decimal_exponent;
+  ASSERT(*found_exponent <= requested_exponent);
+  ASSERT(requested_exponent < *found_exponent + kDecimalExponentDistance);
+}
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/cached-powers.h b/klm/util/double-conversion/cached-powers.h
new file mode 100644
index 00000000..61a50614
--- /dev/null
+++ b/klm/util/double-conversion/cached-powers.h
@@ -0,0 +1,64 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_CACHED_POWERS_H_
+#define DOUBLE_CONVERSION_CACHED_POWERS_H_
+
+#include "diy-fp.h"
+
+namespace double_conversion {
+
+class PowersOfTenCache {
+ public:
+
+  // Not all powers of ten are cached. The decimal exponent of two neighboring
+  // cached numbers will differ by kDecimalExponentDistance.
+  static const int kDecimalExponentDistance;
+
+  static const int kMinDecimalExponent;
+  static const int kMaxDecimalExponent;
+
+  // Returns a cached power-of-ten with a binary exponent in the range
+  // [min_exponent; max_exponent] (boundaries included).
+  static void GetCachedPowerForBinaryExponentRange(int min_exponent,
+                                                   int max_exponent,
+                                                   DiyFp* power,
+                                                   int* decimal_exponent);
+
+  // Returns a cached power of ten x ~= 10^k such that
+  //   k <= decimal_exponent < k + kCachedPowersDecimalDistance.
+  // The given decimal_exponent must satisfy
+  //   kMinDecimalExponent <= requested_exponent, and
+  //   requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance.
+  static void GetCachedPowerForDecimalExponent(int requested_exponent,
+                                               DiyFp* power,
+                                               int* found_exponent);
+};
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_CACHED_POWERS_H_
diff --git a/klm/util/double-conversion/diy-fp.cc b/klm/util/double-conversion/diy-fp.cc
new file mode 100644
index 00000000..ddd1891b
--- /dev/null
+++ b/klm/util/double-conversion/diy-fp.cc
@@ -0,0 +1,57 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+#include "diy-fp.h"
+#include "utils.h"
+
+namespace double_conversion {
+
+void DiyFp::Multiply(const DiyFp& other) {
+  // Simply "emulates" a 128 bit multiplication.
+  // However: the resulting number only contains 64 bits. The least
+  // significant 64 bits are only used for rounding the most significant 64
+  // bits.
+  const uint64_t kM32 = 0xFFFFFFFFU;
+  uint64_t a = f_ >> 32;
+  uint64_t b = f_ & kM32;
+  uint64_t c = other.f_ >> 32;
+  uint64_t d = other.f_ & kM32;
+  uint64_t ac = a * c;
+  uint64_t bc = b * c;
+  uint64_t ad = a * d;
+  uint64_t bd = b * d;
+  uint64_t tmp = (bd >> 32) + (ad & kM32) + (bc & kM32);
+  // By adding 1U << 31 to tmp we round the final result.
+  // Halfway cases will be round up.
+  tmp += 1U << 31;
+  uint64_t result_f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32);
+  e_ += other.e_ + 64;
+  f_ = result_f;
+}
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/diy-fp.h b/klm/util/double-conversion/diy-fp.h
new file mode 100644
index 00000000..9dcf8fbd
--- /dev/null
+++ b/klm/util/double-conversion/diy-fp.h
@@ -0,0 +1,118 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_DIY_FP_H_
+#define DOUBLE_CONVERSION_DIY_FP_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+// This "Do It Yourself Floating Point" class implements a floating-point number
+// with a uint64 significand and an int exponent. Normalized DiyFp numbers will
+// have the most significant bit of the significand set.
+// Multiplication and Subtraction do not normalize their results.
+// DiyFp are not designed to contain special doubles (NaN and Infinity).
+class DiyFp {
+ public:
+  static const int kSignificandSize = 64;
+
+  DiyFp() : f_(0), e_(0) {}
+  DiyFp(uint64_t f, int e) : f_(f), e_(e) {}
+
+  // this = this - other.
+  // The exponents of both numbers must be the same and the significand of this
+  // must be bigger than the significand of other.
+  // The result will not be normalized.
+  void Subtract(const DiyFp& other) {
+    ASSERT(e_ == other.e_);
+    ASSERT(f_ >= other.f_);
+    f_ -= other.f_;
+  }
+
+  // Returns a - b.
+  // The exponents of both numbers must be the same and this must be bigger
+  // than other. The result will not be normalized.
+  static DiyFp Minus(const DiyFp& a, const DiyFp& b) {
+    DiyFp result = a;
+    result.Subtract(b);
+    return result;
+  }
+
+
+  // this = this * other.
+  void Multiply(const DiyFp& other);
+
+  // returns a * b;
+  static DiyFp Times(const DiyFp& a, const DiyFp& b) {
+    DiyFp result = a;
+    result.Multiply(b);
+    return result;
+  }
+
+  void Normalize() {
+    ASSERT(f_ != 0);
+    uint64_t f = f_;
+    int e = e_;
+
+    // This method is mainly called for normalizing boundaries. In general
+    // boundaries need to be shifted by 10 bits. We thus optimize for this case.
+    const uint64_t k10MSBits = UINT64_2PART_C(0xFFC00000, 00000000);
+    while ((f & k10MSBits) == 0) {
+      f <<= 10;
+      e -= 10;
+    }
+    while ((f & kUint64MSB) == 0) {
+      f <<= 1;
+      e--;
+    }
+    f_ = f;
+    e_ = e;
+  }
+
+  static DiyFp Normalize(const DiyFp& a) {
+    DiyFp result = a;
+    result.Normalize();
+    return result;
+  }
+
+  uint64_t f() const { return f_; }
+  int e() const { return e_; }
+
+  void set_f(uint64_t new_value) { f_ = new_value; }
+  void set_e(int new_value) { e_ = new_value; }
+
+ private:
+  static const uint64_t kUint64MSB = UINT64_2PART_C(0x80000000, 00000000);
+
+  uint64_t f_;
+  int e_;
+};
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_DIY_FP_H_
diff --git a/klm/util/double-conversion/double-conversion.cc b/klm/util/double-conversion/double-conversion.cc
new file mode 100644
index 00000000..febba6cd
--- /dev/null
+++ b/klm/util/double-conversion/double-conversion.cc
@@ -0,0 +1,889 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <limits.h>
+#include <math.h>
+
+#include "double-conversion.h"
+
+#include "bignum-dtoa.h"
+#include "fast-dtoa.h"
+#include "fixed-dtoa.h"
+#include "ieee.h"
+#include "strtod.h"
+#include "utils.h"
+
+namespace double_conversion {
+
+const DoubleToStringConverter& DoubleToStringConverter::EcmaScriptConverter() {
+  int flags = UNIQUE_ZERO | EMIT_POSITIVE_EXPONENT_SIGN;
+  static DoubleToStringConverter converter(flags,
+                                           "Infinity",
+                                           "NaN",
+                                           'e',
+                                           -6, 21,
+                                           6, 0);
+  return converter;
+}
+
+
+bool DoubleToStringConverter::HandleSpecialValues(
+    double value,
+    StringBuilder* result_builder) const {
+  Double double_inspect(value);
+  if (double_inspect.IsInfinite()) {
+    if (infinity_symbol_ == NULL) return false;
+    if (value < 0) {
+      result_builder->AddCharacter('-');
+    }
+    result_builder->AddString(infinity_symbol_);
+    return true;
+  }
+  if (double_inspect.IsNan()) {
+    if (nan_symbol_ == NULL) return false;
+    result_builder->AddString(nan_symbol_);
+    return true;
+  }
+  return false;
+}
+
+
+void DoubleToStringConverter::CreateExponentialRepresentation(
+    const char* decimal_digits,
+    int length,
+    int exponent,
+    StringBuilder* result_builder) const {
+  ASSERT(length != 0);
+  result_builder->AddCharacter(decimal_digits[0]);
+  if (length != 1) {
+    result_builder->AddCharacter('.');
+    result_builder->AddSubstring(&decimal_digits[1], length-1);
+  }
+  result_builder->AddCharacter(exponent_character_);
+  if (exponent < 0) {
+    result_builder->AddCharacter('-');
+    exponent = -exponent;
+  } else {
+    if ((flags_ & EMIT_POSITIVE_EXPONENT_SIGN) != 0) {
+      result_builder->AddCharacter('+');
+    }
+  }
+  if (exponent == 0) {
+    result_builder->AddCharacter('0');
+    return;
+  }
+  ASSERT(exponent < 1e4);
+  const int kMaxExponentLength = 5;
+  char buffer[kMaxExponentLength + 1];
+  buffer[kMaxExponentLength] = '\0';
+  int first_char_pos = kMaxExponentLength;
+  while (exponent > 0) {
+    buffer[--first_char_pos] = '0' + (exponent % 10);
+    exponent /= 10;
+  }
+  result_builder->AddSubstring(&buffer[first_char_pos],
+                               kMaxExponentLength - first_char_pos);
+}
+
+
+void DoubleToStringConverter::CreateDecimalRepresentation(
+    const char* decimal_digits,
+    int length,
+    int decimal_point,
+    int digits_after_point,
+    StringBuilder* result_builder) const {
+  // Create a representation that is padded with zeros if needed.
+  if (decimal_point <= 0) {
+      // "0.00000decimal_rep".
+    result_builder->AddCharacter('0');
+    if (digits_after_point > 0) {
+      result_builder->AddCharacter('.');
+      result_builder->AddPadding('0', -decimal_point);
+      ASSERT(length <= digits_after_point - (-decimal_point));
+      result_builder->AddSubstring(decimal_digits, length);
+      int remaining_digits = digits_after_point - (-decimal_point) - length;
+      result_builder->AddPadding('0', remaining_digits);
+    }
+  } else if (decimal_point >= length) {
+    // "decimal_rep0000.00000" or "decimal_rep.0000"
+    result_builder->AddSubstring(decimal_digits, length);
+    result_builder->AddPadding('0', decimal_point - length);
+    if (digits_after_point > 0) {
+      result_builder->AddCharacter('.');
+      result_builder->AddPadding('0', digits_after_point);
+    }
+  } else {
+    // "decima.l_rep000"
+    ASSERT(digits_after_point > 0);
+    result_builder->AddSubstring(decimal_digits, decimal_point);
+    result_builder->AddCharacter('.');
+    ASSERT(length - decimal_point <= digits_after_point);
+    result_builder->AddSubstring(&decimal_digits[decimal_point],
+                                 length - decimal_point);
+    int remaining_digits = digits_after_point - (length - decimal_point);
+    result_builder->AddPadding('0', remaining_digits);
+  }
+  if (digits_after_point == 0) {
+    if ((flags_ & EMIT_TRAILING_DECIMAL_POINT) != 0) {
+      result_builder->AddCharacter('.');
+    }
+    if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) {
+      result_builder->AddCharacter('0');
+    }
+  }
+}
+
+
+bool DoubleToStringConverter::ToShortestIeeeNumber(
+    double value,
+    StringBuilder* result_builder,
+    DoubleToStringConverter::DtoaMode mode) const {
+  ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE);
+  if (Double(value).IsSpecial()) {
+    return HandleSpecialValues(value, result_builder);
+  }
+
+  int decimal_point;
+  bool sign;
+  const int kDecimalRepCapacity = kBase10MaximalLength + 1;
+  char decimal_rep[kDecimalRepCapacity];
+  int decimal_rep_length;
+
+  DoubleToAscii(value, mode, 0, decimal_rep, kDecimalRepCapacity,
+                &sign, &decimal_rep_length, &decimal_point);
+
+  bool unique_zero = (flags_ & UNIQUE_ZERO) != 0;
+  if (sign && (value != 0.0 || !unique_zero)) {
+    result_builder->AddCharacter('-');
+  }
+
+  int exponent = decimal_point - 1;
+  if ((decimal_in_shortest_low_ <= exponent) &&
+      (exponent < decimal_in_shortest_high_)) {
+    CreateDecimalRepresentation(decimal_rep, decimal_rep_length,
+                                decimal_point,
+                                Max(0, decimal_rep_length - decimal_point),
+                                result_builder);
+  } else {
+    CreateExponentialRepresentation(decimal_rep, decimal_rep_length, exponent,
+                                    result_builder);
+  }
+  return true;
+}
+
+
+bool DoubleToStringConverter::ToFixed(double value,
+                                      int requested_digits,
+                                      StringBuilder* result_builder) const {
+  ASSERT(kMaxFixedDigitsBeforePoint == 60);
+  const double kFirstNonFixed = 1e60;
+
+  if (Double(value).IsSpecial()) {
+    return HandleSpecialValues(value, result_builder);
+  }
+
+  if (requested_digits > kMaxFixedDigitsAfterPoint) return false;
+  if (value >= kFirstNonFixed || value <= -kFirstNonFixed) return false;
+
+  // Find a sufficiently precise decimal representation of n.
+  int decimal_point;
+  bool sign;
+  // Add space for the '\0' byte.
+  const int kDecimalRepCapacity =
+      kMaxFixedDigitsBeforePoint + kMaxFixedDigitsAfterPoint + 1;
+  char decimal_rep[kDecimalRepCapacity];
+  int decimal_rep_length;
+  DoubleToAscii(value, FIXED, requested_digits,
+                decimal_rep, kDecimalRepCapacity,
+                &sign, &decimal_rep_length, &decimal_point);
+
+  bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
+  if (sign && (value != 0.0 || !unique_zero)) {
+    result_builder->AddCharacter('-');
+  }
+
+  CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point,
+                              requested_digits, result_builder);
+  return true;
+}
+
+
+bool DoubleToStringConverter::ToExponential(
+    double value,
+    int requested_digits,
+    StringBuilder* result_builder) const {
+  if (Double(value).IsSpecial()) {
+    return HandleSpecialValues(value, result_builder);
+  }
+
+  if (requested_digits < -1) return false;
+  if (requested_digits > kMaxExponentialDigits) return false;
+
+  int decimal_point;
+  bool sign;
+  // Add space for digit before the decimal point and the '\0' character.
+  const int kDecimalRepCapacity = kMaxExponentialDigits + 2;
+  ASSERT(kDecimalRepCapacity > kBase10MaximalLength);
+  char decimal_rep[kDecimalRepCapacity];
+  int decimal_rep_length;
+
+  if (requested_digits == -1) {
+    DoubleToAscii(value, SHORTEST, 0,
+                  decimal_rep, kDecimalRepCapacity,
+                  &sign, &decimal_rep_length, &decimal_point);
+  } else {
+    DoubleToAscii(value, PRECISION, requested_digits + 1,
+                  decimal_rep, kDecimalRepCapacity,
+                  &sign, &decimal_rep_length, &decimal_point);
+    ASSERT(decimal_rep_length <= requested_digits + 1);
+
+    for (int i = decimal_rep_length; i < requested_digits + 1; ++i) {
+      decimal_rep[i] = '0';
+    }
+    decimal_rep_length = requested_digits + 1;
+  }
+
+  bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
+  if (sign && (value != 0.0 || !unique_zero)) {
+    result_builder->AddCharacter('-');
+  }
+
+  int exponent = decimal_point - 1;
+  CreateExponentialRepresentation(decimal_rep,
+                                  decimal_rep_length,
+                                  exponent,
+                                  result_builder);
+  return true;
+}
+
+
+bool DoubleToStringConverter::ToPrecision(double value,
+                                          int precision,
+                                          StringBuilder* result_builder) const {
+  if (Double(value).IsSpecial()) {
+    return HandleSpecialValues(value, result_builder);
+  }
+
+  if (precision < kMinPrecisionDigits || precision > kMaxPrecisionDigits) {
+    return false;
+  }
+
+  // Find a sufficiently precise decimal representation of n.
+  int decimal_point;
+  bool sign;
+  // Add one for the terminating null character.
+  const int kDecimalRepCapacity = kMaxPrecisionDigits + 1;
+  char decimal_rep[kDecimalRepCapacity];
+  int decimal_rep_length;
+
+  DoubleToAscii(value, PRECISION, precision,
+                decimal_rep, kDecimalRepCapacity,
+                &sign, &decimal_rep_length, &decimal_point);
+  ASSERT(decimal_rep_length <= precision);
+
+  bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
+  if (sign && (value != 0.0 || !unique_zero)) {
+    result_builder->AddCharacter('-');
+  }
+
+  // The exponent if we print the number as x.xxeyyy. That is with the
+  // decimal point after the first digit.
+  int exponent = decimal_point - 1;
+
+  int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0;
+  if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) ||
+      (decimal_point - precision + extra_zero >
+       max_trailing_padding_zeroes_in_precision_mode_)) {
+    // Fill buffer to contain 'precision' digits.
+    // Usually the buffer is already at the correct length, but 'DoubleToAscii'
+    // is allowed to return less characters.
+    for (int i = decimal_rep_length; i < precision; ++i) {
+      decimal_rep[i] = '0';
+    }
+
+    CreateExponentialRepresentation(decimal_rep,
+                                    precision,
+                                    exponent,
+                                    result_builder);
+  } else {
+    CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point,
+                                Max(0, precision - decimal_point),
+                                result_builder);
+  }
+  return true;
+}
+
+
+static BignumDtoaMode DtoaToBignumDtoaMode(
+    DoubleToStringConverter::DtoaMode dtoa_mode) {
+  switch (dtoa_mode) {
+    case DoubleToStringConverter::SHORTEST:  return BIGNUM_DTOA_SHORTEST;
+    case DoubleToStringConverter::SHORTEST_SINGLE:
+        return BIGNUM_DTOA_SHORTEST_SINGLE;
+    case DoubleToStringConverter::FIXED:     return BIGNUM_DTOA_FIXED;
+    case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION;
+    default:
+      UNREACHABLE();
+      return BIGNUM_DTOA_SHORTEST;  // To silence compiler.
+  }
+}
+
+
+void DoubleToStringConverter::DoubleToAscii(double v,
+                                            DtoaMode mode,
+                                            int requested_digits,
+                                            char* buffer,
+                                            int buffer_length,
+                                            bool* sign,
+                                            int* length,
+                                            int* point) {
+  Vector<char> vector(buffer, buffer_length);
+  ASSERT(!Double(v).IsSpecial());
+  ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE || requested_digits >= 0);
+
+  if (Double(v).Sign() < 0) {
+    *sign = true;
+    v = -v;
+  } else {
+    *sign = false;
+  }
+
+  if (mode == PRECISION && requested_digits == 0) {
+    vector[0] = '\0';
+    *length = 0;
+    return;
+  }
+
+  if (v == 0) {
+    vector[0] = '0';
+    vector[1] = '\0';
+    *length = 1;
+    *point = 1;
+    return;
+  }
+
+  bool fast_worked;
+  switch (mode) {
+    case SHORTEST:
+      fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST, 0, vector, length, point);
+      break;
+    case SHORTEST_SINGLE:
+      fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST_SINGLE, 0,
+                             vector, length, point);
+      break;
+    case FIXED:
+      fast_worked = FastFixedDtoa(v, requested_digits, vector, length, point);
+      break;
+    case PRECISION:
+      fast_worked = FastDtoa(v, FAST_DTOA_PRECISION, requested_digits,
+                             vector, length, point);
+      break;
+    default:
+      UNREACHABLE();
+      fast_worked = false;
+  }
+  if (fast_worked) return;
+
+  // If the fast dtoa didn't succeed use the slower bignum version.
+  BignumDtoaMode bignum_mode = DtoaToBignumDtoaMode(mode);
+  BignumDtoa(v, bignum_mode, requested_digits, vector, length, point);
+  vector[*length] = '\0';
+}
+
+
+// Consumes the given substring from the iterator.
+// Returns false, if the substring does not match.
+static bool ConsumeSubString(const char** current,
+                             const char* end,
+                             const char* substring) {
+  ASSERT(**current == *substring);
+  for (substring++; *substring != '\0'; substring++) {
+    ++*current;
+    if (*current == end || **current != *substring) return false;
+  }
+  ++*current;
+  return true;
+}
+
+
+// Maximum number of significant digits in decimal representation.
+// The longest possible double in decimal representation is
+// (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074
+// (768 digits). If we parse a number whose first digits are equal to a
+// mean of 2 adjacent doubles (that could have up to 769 digits) the result
+// must be rounded to the bigger one unless the tail consists of zeros, so
+// we don't need to preserve all the digits.
+const int kMaxSignificantDigits = 772;
+
+
+// Returns true if a nonspace found and false if the end has reached.
+static inline bool AdvanceToNonspace(const char** current, const char* end) {
+  while (*current != end) {
+    if (**current != ' ') return true;
+    ++*current;
+  }
+  return false;
+}
+
+
+static bool isDigit(int x, int radix) {
+  return (x >= '0' && x <= '9' && x < '0' + radix)
+      || (radix > 10 && x >= 'a' && x < 'a' + radix - 10)
+      || (radix > 10 && x >= 'A' && x < 'A' + radix - 10);
+}
+
+
+static double SignedZero(bool sign) {
+  return sign ? -0.0 : 0.0;
+}
+
+
+// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
+template <int radix_log_2>
+static double RadixStringToIeee(const char* current,
+                                const char* end,
+                                bool sign,
+                                bool allow_trailing_junk,
+                                double junk_string_value,
+                                bool read_as_double,
+                                const char** trailing_pointer) {
+  ASSERT(current != end);
+
+  const int kDoubleSize = Double::kSignificandSize;
+  const int kSingleSize = Single::kSignificandSize;
+  const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize;
+
+  // Skip leading 0s.
+  while (*current == '0') {
+    ++current;
+    if (current == end) {
+      *trailing_pointer = end;
+      return SignedZero(sign);
+    }
+  }
+
+  int64_t number = 0;
+  int exponent = 0;
+  const int radix = (1 << radix_log_2);
+
+  do {
+    int digit;
+    if (*current >= '0' && *current <= '9' && *current < '0' + radix) {
+      digit = static_cast<char>(*current) - '0';
+    } else if (radix > 10 && *current >= 'a' && *current < 'a' + radix - 10) {
+      digit = static_cast<char>(*current) - 'a' + 10;
+    } else if (radix > 10 && *current >= 'A' && *current < 'A' + radix - 10) {
+      digit = static_cast<char>(*current) - 'A' + 10;
+    } else {
+      if (allow_trailing_junk || !AdvanceToNonspace(&current, end)) {
+        break;
+      } else {
+        return junk_string_value;
+      }
+    }
+
+    number = number * radix + digit;
+    int overflow = static_cast<int>(number >> kSignificandSize);
+    if (overflow != 0) {
+      // Overflow occurred. Need to determine which direction to round the
+      // result.
+      int overflow_bits_count = 1;
+      while (overflow > 1) {
+        overflow_bits_count++;
+        overflow >>= 1;
+      }
+
+      int dropped_bits_mask = ((1 << overflow_bits_count) - 1);
+      int dropped_bits = static_cast<int>(number) & dropped_bits_mask;
+      number >>= overflow_bits_count;
+      exponent = overflow_bits_count;
+
+      bool zero_tail = true;
+      while (true) {
+        ++current;
+        if (current == end || !isDigit(*current, radix)) break;
+        zero_tail = zero_tail && *current == '0';
+        exponent += radix_log_2;
+      }
+
+      if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+        return junk_string_value;
+      }
+
+      int middle_value = (1 << (overflow_bits_count - 1));
+      if (dropped_bits > middle_value) {
+        number++;  // Rounding up.
+      } else if (dropped_bits == middle_value) {
+        // Rounding to even to consistency with decimals: half-way case rounds
+        // up if significant part is odd and down otherwise.
+        if ((number & 1) != 0 || !zero_tail) {
+          number++;  // Rounding up.
+        }
+      }
+
+      // Rounding up may cause overflow.
+      if ((number & ((int64_t)1 << kSignificandSize)) != 0) {
+        exponent++;
+        number >>= 1;
+      }
+      break;
+    }
+    ++current;
+  } while (current != end);
+
+  ASSERT(number < ((int64_t)1 << kSignificandSize));
+  ASSERT(static_cast<int64_t>(static_cast<double>(number)) == number);
+
+  *trailing_pointer = current;
+
+  if (exponent == 0) {
+    if (sign) {
+      if (number == 0) return -0.0;
+      number = -number;
+    }
+    return static_cast<double>(number);
+  }
+
+  ASSERT(number != 0);
+  return Double(DiyFp(number, exponent)).value();
+}
+
+
+double StringToDoubleConverter::StringToIeee(
+    const char* input,
+    int length,
+    int* processed_characters_count,
+    bool read_as_double) const {
+  const char* current = input;
+  const char* end = input + length;
+
+  *processed_characters_count = 0;
+
+  const bool allow_trailing_junk = (flags_ & ALLOW_TRAILING_JUNK) != 0;
+  const bool allow_leading_spaces = (flags_ & ALLOW_LEADING_SPACES) != 0;
+  const bool allow_trailing_spaces = (flags_ & ALLOW_TRAILING_SPACES) != 0;
+  const bool allow_spaces_after_sign = (flags_ & ALLOW_SPACES_AFTER_SIGN) != 0;
+
+  // To make sure that iterator dereferencing is valid the following
+  // convention is used:
+  // 1. Each '++current' statement is followed by check for equality to 'end'.
+  // 2. If AdvanceToNonspace returned false then current == end.
+  // 3. If 'current' becomes equal to 'end' the function returns or goes to
+  // 'parsing_done'.
+  // 4. 'current' is not dereferenced after the 'parsing_done' label.
+  // 5. Code before 'parsing_done' may rely on 'current != end'.
+  if (current == end) return empty_string_value_;
+
+  if (allow_leading_spaces || allow_trailing_spaces) {
+    if (!AdvanceToNonspace(&current, end)) {
+      *processed_characters_count = current - input;
+      return empty_string_value_;
+    }
+    if (!allow_leading_spaces && (input != current)) {
+      // No leading spaces allowed, but AdvanceToNonspace moved forward.
+      return junk_string_value_;
+    }
+  }
+
+  // The longest form of simplified number is: "-<significant digits>.1eXXX\0".
+  const int kBufferSize = kMaxSignificantDigits + 10;
+  char buffer[kBufferSize];  // NOLINT: size is known at compile time.
+  int buffer_pos = 0;
+
+  // Exponent will be adjusted if insignificant digits of the integer part
+  // or insignificant leading zeros of the fractional part are dropped.
+  int exponent = 0;
+  int significant_digits = 0;
+  int insignificant_digits = 0;
+  bool nonzero_digit_dropped = false;
+
+  bool sign = false;
+
+  if (*current == '+' || *current == '-') {
+    sign = (*current == '-');
+    ++current;
+    const char* next_non_space = current;
+    // Skip following spaces (if allowed).
+    if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_;
+    if (!allow_spaces_after_sign && (current != next_non_space)) {
+      return junk_string_value_;
+    }
+    current = next_non_space;
+  }
+
+  if (infinity_symbol_ != NULL) {
+    if (*current == infinity_symbol_[0]) {
+      if (!ConsumeSubString(&current, end, infinity_symbol_)) {
+        return junk_string_value_;
+      }
+
+      if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
+        return junk_string_value_;
+      }
+      if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+        return junk_string_value_;
+      }
+
+      ASSERT(buffer_pos == 0);
+      *processed_characters_count = current - input;
+      return sign ? -Double::Infinity() : Double::Infinity();
+    }
+  }
+
+  if (nan_symbol_ != NULL) {
+    if (*current == nan_symbol_[0]) {
+      if (!ConsumeSubString(&current, end, nan_symbol_)) {
+        return junk_string_value_;
+      }
+
+      if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
+        return junk_string_value_;
+      }
+      if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+        return junk_string_value_;
+      }
+
+      ASSERT(buffer_pos == 0);
+      *processed_characters_count = current - input;
+      return sign ? -Double::NaN() : Double::NaN();
+    }
+  }
+
+  bool leading_zero = false;
+  if (*current == '0') {
+    ++current;
+    if (current == end) {
+      *processed_characters_count = current - input;
+      return SignedZero(sign);
+    }
+
+    leading_zero = true;
+
+    // It could be hexadecimal value.
+    if ((flags_ & ALLOW_HEX) && (*current == 'x' || *current == 'X')) {
+      ++current;
+      if (current == end || !isDigit(*current, 16)) {
+        return junk_string_value_;  // "0x".
+      }
+
+      const char* tail_pointer = NULL;
+      double result = RadixStringToIeee<4>(current,
+                                           end,
+                                           sign,
+                                           allow_trailing_junk,
+                                           junk_string_value_,
+                                           read_as_double,
+                                           &tail_pointer);
+      if (tail_pointer != NULL) {
+        if (allow_trailing_spaces) AdvanceToNonspace(&tail_pointer, end);
+        *processed_characters_count = tail_pointer - input;
+      }
+      return result;
+    }
+
+    // Ignore leading zeros in the integer part.
+    while (*current == '0') {
+      ++current;
+      if (current == end) {
+        *processed_characters_count = current - input;
+        return SignedZero(sign);
+      }
+    }
+  }
+
+  bool octal = leading_zero && (flags_ & ALLOW_OCTALS) != 0;
+
+  // Copy significant digits of the integer part (if any) to the buffer.
+  while (*current >= '0' && *current <= '9') {
+    if (significant_digits < kMaxSignificantDigits) {
+      ASSERT(buffer_pos < kBufferSize);
+      buffer[buffer_pos++] = static_cast<char>(*current);
+      significant_digits++;
+      // Will later check if it's an octal in the buffer.
+    } else {
+      insignificant_digits++;  // Move the digit into the exponential part.
+      nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
+    }
+    octal = octal && *current < '8';
+    ++current;
+    if (current == end) goto parsing_done;
+  }
+
+  if (significant_digits == 0) {
+    octal = false;
+  }
+
+  if (*current == '.') {
+    if (octal && !allow_trailing_junk) return junk_string_value_;
+    if (octal) goto parsing_done;
+
+    ++current;
+    if (current == end) {
+      if (significant_digits == 0 && !leading_zero) {
+        return junk_string_value_;
+      } else {
+        goto parsing_done;
+      }
+    }
+
+    if (significant_digits == 0) {
+      // octal = false;
+      // Integer part consists of 0 or is absent. Significant digits start after
+      // leading zeros (if any).
+      while (*current == '0') {
+        ++current;
+        if (current == end) {
+          *processed_characters_count = current - input;
+          return SignedZero(sign);
+        }
+        exponent--;  // Move this 0 into the exponent.
+      }
+    }
+
+    // There is a fractional part.
+    // We don't emit a '.', but adjust the exponent instead.
+    while (*current >= '0' && *current <= '9') {
+      if (significant_digits < kMaxSignificantDigits) {
+        ASSERT(buffer_pos < kBufferSize);
+        buffer[buffer_pos++] = static_cast<char>(*current);
+        significant_digits++;
+        exponent--;
+      } else {
+        // Ignore insignificant digits in the fractional part.
+        nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
+      }
+      ++current;
+      if (current == end) goto parsing_done;
+    }
+  }
+
+  if (!leading_zero && exponent == 0 && significant_digits == 0) {
+    // If leading_zeros is true then the string contains zeros.
+    // If exponent < 0 then string was [+-]\.0*...
+    // If significant_digits != 0 the string is not equal to 0.
+    // Otherwise there are no digits in the string.
+    return junk_string_value_;
+  }
+
+  // Parse exponential part.
+  if (*current == 'e' || *current == 'E') {
+    if (octal && !allow_trailing_junk) return junk_string_value_;
+    if (octal) goto parsing_done;
+    ++current;
+    if (current == end) {
+      if (allow_trailing_junk) {
+        goto parsing_done;
+      } else {
+        return junk_string_value_;
+      }
+    }
+    char sign = '+';
+    if (*current == '+' || *current == '-') {
+      sign = static_cast<char>(*current);
+      ++current;
+      if (current == end) {
+        if (allow_trailing_junk) {
+          goto parsing_done;
+        } else {
+          return junk_string_value_;
+        }
+      }
+    }
+
+    if (current == end || *current < '0' || *current > '9') {
+      if (allow_trailing_junk) {
+        goto parsing_done;
+      } else {
+        return junk_string_value_;
+      }
+    }
+
+    const int max_exponent = INT_MAX / 2;
+    ASSERT(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2);
+    int num = 0;
+    do {
+      // Check overflow.
+      int digit = *current - '0';
+      if (num >= max_exponent / 10
+          && !(num == max_exponent / 10 && digit <= max_exponent % 10)) {
+        num = max_exponent;
+      } else {
+        num = num * 10 + digit;
+      }
+      ++current;
+    } while (current != end && *current >= '0' && *current <= '9');
+
+    exponent += (sign == '-' ? -num : num);
+  }
+
+  if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
+    return junk_string_value_;
+  }
+  if (!allow_trailing_junk && AdvanceToNonspace(&current, end)) {
+    return junk_string_value_;
+  }
+  if (allow_trailing_spaces) {
+    AdvanceToNonspace(&current, end);
+  }
+
+  parsing_done:
+  exponent += insignificant_digits;
+
+  if (octal) {
+    double result;
+    const char* tail_pointer = NULL;
+    result = RadixStringToIeee<3>(buffer,
+                                  buffer + buffer_pos,
+                                  sign,
+                                  allow_trailing_junk,
+                                  junk_string_value_,
+                                  read_as_double,
+                                  &tail_pointer);
+    ASSERT(tail_pointer != NULL);
+    *processed_characters_count = current - input;
+    return result;
+  }
+
+  if (nonzero_digit_dropped) {
+    buffer[buffer_pos++] = '1';
+    exponent--;
+  }
+
+  ASSERT(buffer_pos < kBufferSize);
+  buffer[buffer_pos] = '\0';
+
+  double converted;
+  if (read_as_double) {
+    converted = Strtod(Vector<const char>(buffer, buffer_pos), exponent);
+  } else {
+    converted = Strtof(Vector<const char>(buffer, buffer_pos), exponent);
+  }
+  *processed_characters_count = current - input;
+  return sign? -converted: converted;
+}
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/double-conversion.h b/klm/util/double-conversion/double-conversion.h
new file mode 100644
index 00000000..1c3387d4
--- /dev/null
+++ b/klm/util/double-conversion/double-conversion.h
@@ -0,0 +1,536 @@
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_
+#define DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+class DoubleToStringConverter {
+ public:
+  // When calling ToFixed with a double > 10^kMaxFixedDigitsBeforePoint
+  // or a requested_digits parameter > kMaxFixedDigitsAfterPoint then the
+  // function returns false.
+  static const int kMaxFixedDigitsBeforePoint = 60;
+  static const int kMaxFixedDigitsAfterPoint = 60;
+
+  // When calling ToExponential with a requested_digits
+  // parameter > kMaxExponentialDigits then the function returns false.
+  static const int kMaxExponentialDigits = 120;
+
+  // When calling ToPrecision with a requested_digits
+  // parameter < kMinPrecisionDigits or requested_digits > kMaxPrecisionDigits
+  // then the function returns false.
+  static const int kMinPrecisionDigits = 1;
+  static const int kMaxPrecisionDigits = 120;
+
+  enum Flags {
+    NO_FLAGS = 0,
+    EMIT_POSITIVE_EXPONENT_SIGN = 1,
+    EMIT_TRAILING_DECIMAL_POINT = 2,
+    EMIT_TRAILING_ZERO_AFTER_POINT = 4,
+    UNIQUE_ZERO = 8
+  };
+
+  // Flags should be a bit-or combination of the possible Flags-enum.
+  //  - NO_FLAGS: no special flags.
+  //  - EMIT_POSITIVE_EXPONENT_SIGN: when the number is converted into exponent
+  //    form, emits a '+' for positive exponents. Example: 1.2e+2.
+  //  - EMIT_TRAILING_DECIMAL_POINT: when the input number is an integer and is
+  //    converted into decimal format then a trailing decimal point is appended.
+  //    Example: 2345.0 is converted to "2345.".
+  //  - EMIT_TRAILING_ZERO_AFTER_POINT: in addition to a trailing decimal point
+  //    emits a trailing '0'-character. This flag requires the
+  //    EXMIT_TRAILING_DECIMAL_POINT flag.
+  //    Example: 2345.0 is converted to "2345.0".
+  //  - UNIQUE_ZERO: "-0.0" is converted to "0.0".
+  //
+  // Infinity symbol and nan_symbol provide the string representation for these
+  // special values. If the string is NULL and the special value is encountered
+  // then the conversion functions return false.
+  //
+  // The exponent_character is used in exponential representations. It is
+  // usually 'e' or 'E'.
+  //
+  // When converting to the shortest representation the converter will
+  // represent input numbers in decimal format if they are in the interval
+  // [10^decimal_in_shortest_low; 10^decimal_in_shortest_high[
+  //    (lower boundary included, greater boundary excluded).
+  // Example: with decimal_in_shortest_low = -6 and
+  //               decimal_in_shortest_high = 21:
+  //   ToShortest(0.000001)  -> "0.000001"
+  //   ToShortest(0.0000001) -> "1e-7"
+  //   ToShortest(111111111111111111111.0)  -> "111111111111111110000"
+  //   ToShortest(100000000000000000000.0)  -> "100000000000000000000"
+  //   ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21"
+  //
+  // When converting to precision mode the converter may add
+  // max_leading_padding_zeroes before returning the number in exponential
+  // format.
+  // Example with max_leading_padding_zeroes_in_precision_mode = 6.
+  //   ToPrecision(0.0000012345, 2) -> "0.0000012"
+  //   ToPrecision(0.00000012345, 2) -> "1.2e-7"
+  // Similarily the converter may add up to
+  // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid
+  // returning an exponential representation. A zero added by the
+  // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit.
+  // Examples for max_trailing_padding_zeroes_in_precision_mode = 1:
+  //   ToPrecision(230.0, 2) -> "230"
+  //   ToPrecision(230.0, 2) -> "230."  with EMIT_TRAILING_DECIMAL_POINT.
+  //   ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT.
+  DoubleToStringConverter(int flags,
+                          const char* infinity_symbol,
+                          const char* nan_symbol,
+                          char exponent_character,
+                          int decimal_in_shortest_low,
+                          int decimal_in_shortest_high,
+                          int max_leading_padding_zeroes_in_precision_mode,
+                          int max_trailing_padding_zeroes_in_precision_mode)
+      : flags_(flags),
+        infinity_symbol_(infinity_symbol),
+        nan_symbol_(nan_symbol),
+        exponent_character_(exponent_character),
+        decimal_in_shortest_low_(decimal_in_shortest_low),
+        decimal_in_shortest_high_(decimal_in_shortest_high),
+        max_leading_padding_zeroes_in_precision_mode_(
+            max_leading_padding_zeroes_in_precision_mode),
+        max_trailing_padding_zeroes_in_precision_mode_(
+            max_trailing_padding_zeroes_in_precision_mode) {
+    // When 'trailing zero after the point' is set, then 'trailing point'
+    // must be set too.
+    ASSERT(((flags & EMIT_TRAILING_DECIMAL_POINT) != 0) ||
+        !((flags & EMIT_TRAILING_ZERO_AFTER_POINT) != 0));
+  }
+
+  // Returns a converter following the EcmaScript specification.
+  static const DoubleToStringConverter& EcmaScriptConverter();
+
+  // Computes the shortest string of digits that correctly represent the input
+  // number. Depending on decimal_in_shortest_low and decimal_in_shortest_high
+  // (see constructor) it then either returns a decimal representation, or an
+  // exponential representation.
+  // Example with decimal_in_shortest_low = -6,
+  //              decimal_in_shortest_high = 21,
+  //              EMIT_POSITIVE_EXPONENT_SIGN activated, and
+  //              EMIT_TRAILING_DECIMAL_POINT deactived:
+  //   ToShortest(0.000001)  -> "0.000001"
+  //   ToShortest(0.0000001) -> "1e-7"
+  //   ToShortest(111111111111111111111.0)  -> "111111111111111110000"
+  //   ToShortest(100000000000000000000.0)  -> "100000000000000000000"
+  //   ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21"
+  //
+  // Note: the conversion may round the output if the returned string
+  // is accurate enough to uniquely identify the input-number.
+  // For example the most precise representation of the double 9e59 equals
+  // "899999999999999918767229449717619953810131273674690656206848", but
+  // the converter will return the shorter (but still correct) "9e59".
+  //
+  // Returns true if the conversion succeeds. The conversion always succeeds
+  // except when the input value is special and no infinity_symbol or
+  // nan_symbol has been given to the constructor.
+  bool ToShortest(double value, StringBuilder* result_builder) const {
+    return ToShortestIeeeNumber(value, result_builder, SHORTEST);
+  }
+
+  // Same as ToShortest, but for single-precision floats.
+  bool ToShortestSingle(float value, StringBuilder* result_builder) const {
+    return ToShortestIeeeNumber(value, result_builder, SHORTEST_SINGLE);
+  }
+
+
+  // Computes a decimal representation with a fixed number of digits after the
+  // decimal point. The last emitted digit is rounded.
+  //
+  // Examples:
+  //   ToFixed(3.12, 1) -> "3.1"
+  //   ToFixed(3.1415, 3) -> "3.142"
+  //   ToFixed(1234.56789, 4) -> "1234.5679"
+  //   ToFixed(1.23, 5) -> "1.23000"
+  //   ToFixed(0.1, 4) -> "0.1000"
+  //   ToFixed(1e30, 2) -> "1000000000000000019884624838656.00"
+  //   ToFixed(0.1, 30) -> "0.100000000000000005551115123126"
+  //   ToFixed(0.1, 17) -> "0.10000000000000001"
+  //
+  // If requested_digits equals 0, then the tail of the result depends on
+  // the EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT.
+  // Examples, for requested_digits == 0,
+  //   let EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT be
+  //    - false and false: then 123.45 -> 123
+  //                             0.678 -> 1
+  //    - true and false: then 123.45 -> 123.
+  //                            0.678 -> 1.
+  //    - true and true: then 123.45 -> 123.0
+  //                           0.678 -> 1.0
+  //
+  // Returns true if the conversion succeeds. The conversion always succeeds
+  // except for the following cases:
+  //   - the input value is special and no infinity_symbol or nan_symbol has
+  //     been provided to the constructor,
+  //   - 'value' > 10^kMaxFixedDigitsBeforePoint, or
+  //   - 'requested_digits' > kMaxFixedDigitsAfterPoint.
+  // The last two conditions imply that the result will never contain more than
+  // 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters
+  // (one additional character for the sign, and one for the decimal point).
+  bool ToFixed(double value,
+               int requested_digits,
+               StringBuilder* result_builder) const;
+
+  // Computes a representation in exponential format with requested_digits
+  // after the decimal point. The last emitted digit is rounded.
+  // If requested_digits equals -1, then the shortest exponential representation
+  // is computed.
+  //
+  // Examples with EMIT_POSITIVE_EXPONENT_SIGN deactivated, and
+  //               exponent_character set to 'e'.
+  //   ToExponential(3.12, 1) -> "3.1e0"
+  //   ToExponential(5.0, 3) -> "5.000e0"
+  //   ToExponential(0.001, 2) -> "1.00e-3"
+  //   ToExponential(3.1415, -1) -> "3.1415e0"
+  //   ToExponential(3.1415, 4) -> "3.1415e0"
+  //   ToExponential(3.1415, 3) -> "3.142e0"
+  //   ToExponential(123456789000000, 3) -> "1.235e14"
+  //   ToExponential(1000000000000000019884624838656.0, -1) -> "1e30"
+  //   ToExponential(1000000000000000019884624838656.0, 32) ->
+  //                     "1.00000000000000001988462483865600e30"
+  //   ToExponential(1234, 0) -> "1e3"
+  //
+  // Returns true if the conversion succeeds. The conversion always succeeds
+  // except for the following cases:
+  //   - the input value is special and no infinity_symbol or nan_symbol has
+  //     been provided to the constructor,
+  //   - 'requested_digits' > kMaxExponentialDigits.
+  // The last condition implies that the result will never contain more than
+  // kMaxExponentialDigits + 8 characters (the sign, the digit before the
+  // decimal point, the decimal point, the exponent character, the
+  // exponent's sign, and at most 3 exponent digits).
+  bool ToExponential(double value,
+                     int requested_digits,
+                     StringBuilder* result_builder) const;
+
+  // Computes 'precision' leading digits of the given 'value' and returns them
+  // either in exponential or decimal format, depending on
+  // max_{leading|trailing}_padding_zeroes_in_precision_mode (given to the
+  // constructor).
+  // The last computed digit is rounded.
+  //
+  // Example with max_leading_padding_zeroes_in_precision_mode = 6.
+  //   ToPrecision(0.0000012345, 2) -> "0.0000012"
+  //   ToPrecision(0.00000012345, 2) -> "1.2e-7"
+  // Similarily the converter may add up to
+  // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid
+  // returning an exponential representation. A zero added by the
+  // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit.
+  // Examples for max_trailing_padding_zeroes_in_precision_mode = 1:
+  //   ToPrecision(230.0, 2) -> "230"
+  //   ToPrecision(230.0, 2) -> "230."  with EMIT_TRAILING_DECIMAL_POINT.
+  //   ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT.
+  // Examples for max_trailing_padding_zeroes_in_precision_mode = 3, and no
+  //    EMIT_TRAILING_ZERO_AFTER_POINT:
+  //   ToPrecision(123450.0, 6) -> "123450"
+  //   ToPrecision(123450.0, 5) -> "123450"
+  //   ToPrecision(123450.0, 4) -> "123500"
+  //   ToPrecision(123450.0, 3) -> "123000"
+  //   ToPrecision(123450.0, 2) -> "1.2e5"
+  //
+  // Returns true if the conversion succeeds. The conversion always succeeds
+  // except for the following cases:
+  //   - the input value is special and no infinity_symbol or nan_symbol has
+  //     been provided to the constructor,
+  //   - precision < kMinPericisionDigits
+  //   - precision > kMaxPrecisionDigits
+  // The last condition implies that the result will never contain more than
+  // kMaxPrecisionDigits + 7 characters (the sign, the decimal point, the
+  // exponent character, the exponent's sign, and at most 3 exponent digits).
+  bool ToPrecision(double value,
+                   int precision,
+                   StringBuilder* result_builder) const;
+
+  enum DtoaMode {
+    // Produce the shortest correct representation.
+    // For example the output of 0.299999999999999988897 is (the less accurate
+    // but correct) 0.3.
+    SHORTEST,
+    // Same as SHORTEST, but for single-precision floats.
+    SHORTEST_SINGLE,
+    // Produce a fixed number of digits after the decimal point.
+    // For instance fixed(0.1, 4) becomes 0.1000
+    // If the input number is big, the output will be big.
+    FIXED,
+    // Fixed number of digits (independent of the decimal point).
+    PRECISION
+  };
+
+  // The maximal number of digits that are needed to emit a double in base 10.
+  // A higher precision can be achieved by using more digits, but the shortest
+  // accurate representation of any double will never use more digits than
+  // kBase10MaximalLength.
+  // Note that DoubleToAscii null-terminates its input. So the given buffer
+  // should be at least kBase10MaximalLength + 1 characters long.
+  static const int kBase10MaximalLength = 17;
+
+  // Converts the given double 'v' to ascii. 'v' must not be NaN, +Infinity, or
+  // -Infinity. In SHORTEST_SINGLE-mode this restriction also applies to 'v'
+  // after it has been casted to a single-precision float. That is, in this
+  // mode static_cast<float>(v) must not be NaN, +Infinity or -Infinity.
+  //
+  // The result should be interpreted as buffer * 10^(point-length).
+  //
+  // The output depends on the given mode:
+  //  - SHORTEST: produce the least amount of digits for which the internal
+  //   identity requirement is still satisfied. If the digits are printed
+  //   (together with the correct exponent) then reading this number will give
+  //   'v' again. The buffer will choose the representation that is closest to
+  //   'v'. If there are two at the same distance, than the one farther away
+  //   from 0 is chosen (halfway cases - ending with 5 - are rounded up).
+  //   In this mode the 'requested_digits' parameter is ignored.
+  //  - SHORTEST_SINGLE: same as SHORTEST but with single-precision.
+  //  - FIXED: produces digits necessary to print a given number with
+  //   'requested_digits' digits after the decimal point. The produced digits
+  //   might be too short in which case the caller has to fill the remainder
+  //   with '0's.
+  //   Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2.
+  //   Halfway cases are rounded towards +/-Infinity (away from 0). The call
+  //   toFixed(0.15, 2) thus returns buffer="2", point=0.
+  //   The returned buffer may contain digits that would be truncated from the
+  //   shortest representation of the input.
+  //  - PRECISION: produces 'requested_digits' where the first digit is not '0'.
+  //   Even though the length of produced digits usually equals
+  //   'requested_digits', the function is allowed to return fewer digits, in
+  //   which case the caller has to fill the missing digits with '0's.
+  //   Halfway cases are again rounded away from 0.
+  // DoubleToAscii expects the given buffer to be big enough to hold all
+  // digits and a terminating null-character. In SHORTEST-mode it expects a
+  // buffer of at least kBase10MaximalLength + 1. In all other modes the
+  // requested_digits parameter and the padding-zeroes limit the size of the
+  // output. Don't forget the decimal point, the exponent character and the
+  // terminating null-character when computing the maximal output size.
+  // The given length is only used in debug mode to ensure the buffer is big
+  // enough.
+  static void DoubleToAscii(double v,
+                            DtoaMode mode,
+                            int requested_digits,
+                            char* buffer,
+                            int buffer_length,
+                            bool* sign,
+                            int* length,
+                            int* point);
+
+ private:
+  // Implementation for ToShortest and ToShortestSingle.
+  bool ToShortestIeeeNumber(double value,
+                            StringBuilder* result_builder,
+                            DtoaMode mode) const;
+
+  // If the value is a special value (NaN or Infinity) constructs the
+  // corresponding string using the configured infinity/nan-symbol.
+  // If either of them is NULL or the value is not special then the
+  // function returns false.
+  bool HandleSpecialValues(double value, StringBuilder* result_builder) const;
+  // Constructs an exponential representation (i.e. 1.234e56).
+  // The given exponent assumes a decimal point after the first decimal digit.
+  void CreateExponentialRepresentation(const char* decimal_digits,
+                                       int length,
+                                       int exponent,
+                                       StringBuilder* result_builder) const;
+  // Creates a decimal representation (i.e 1234.5678).
+  void CreateDecimalRepresentation(const char* decimal_digits,
+                                   int length,
+                                   int decimal_point,
+                                   int digits_after_point,
+                                   StringBuilder* result_builder) const;
+
+  const int flags_;
+  const char* const infinity_symbol_;
+  const char* const nan_symbol_;
+  const char exponent_character_;
+  const int decimal_in_shortest_low_;
+  const int decimal_in_shortest_high_;
+  const int max_leading_padding_zeroes_in_precision_mode_;
+  const int max_trailing_padding_zeroes_in_precision_mode_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(DoubleToStringConverter);
+};
+
+
+class StringToDoubleConverter {
+ public:
+  // Enumeration for allowing octals and ignoring junk when converting
+  // strings to numbers.
+  enum Flags {
+    NO_FLAGS = 0,
+    ALLOW_HEX = 1,
+    ALLOW_OCTALS = 2,
+    ALLOW_TRAILING_JUNK = 4,
+    ALLOW_LEADING_SPACES = 8,
+    ALLOW_TRAILING_SPACES = 16,
+    ALLOW_SPACES_AFTER_SIGN = 32
+  };
+
+  // Flags should be a bit-or combination of the possible Flags-enum.
+  //  - NO_FLAGS: no special flags.
+  //  - ALLOW_HEX: recognizes the prefix "0x". Hex numbers may only be integers.
+  //      Ex: StringToDouble("0x1234") -> 4660.0
+  //          In StringToDouble("0x1234.56") the characters ".56" are trailing
+  //          junk. The result of the call is hence dependent on
+  //          the ALLOW_TRAILING_JUNK flag and/or the junk value.
+  //      With this flag "0x" is a junk-string. Even with ALLOW_TRAILING_JUNK,
+  //      the string will not be parsed as "0" followed by junk.
+  //
+  //  - ALLOW_OCTALS: recognizes the prefix "0" for octals:
+  //      If a sequence of octal digits starts with '0', then the number is
+  //      read as octal integer. Octal numbers may only be integers.
+  //      Ex: StringToDouble("01234") -> 668.0
+  //          StringToDouble("012349") -> 12349.0  // Not a sequence of octal
+  //                                               // digits.
+  //          In StringToDouble("01234.56") the characters ".56" are trailing
+  //          junk. The result of the call is hence dependent on
+  //          the ALLOW_TRAILING_JUNK flag and/or the junk value.
+  //          In StringToDouble("01234e56") the characters "e56" are trailing
+  //          junk, too.
+  //  - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of
+  //      a double literal.
+  //  - ALLOW_LEADING_SPACES: skip over leading spaces.
+  //  - ALLOW_TRAILING_SPACES: ignore trailing spaces.
+  //  - ALLOW_SPACES_AFTER_SIGN: ignore spaces after the sign.
+  //       Ex: StringToDouble("-   123.2") -> -123.2.
+  //           StringToDouble("+   123.2") -> 123.2
+  //
+  // empty_string_value is returned when an empty string is given as input.
+  // If ALLOW_LEADING_SPACES or ALLOW_TRAILING_SPACES are set, then a string
+  // containing only spaces is converted to the 'empty_string_value', too.
+  //
+  // junk_string_value is returned when
+  //  a) ALLOW_TRAILING_JUNK is not set, and a junk character (a character not
+  //     part of a double-literal) is found.
+  //  b) ALLOW_TRAILING_JUNK is set, but the string does not start with a
+  //     double literal.
+  //
+  // infinity_symbol and nan_symbol are strings that are used to detect
+  // inputs that represent infinity and NaN. They can be null, in which case
+  // they are ignored.
+  // The conversion routine first reads any possible signs. Then it compares the
+  // following character of the input-string with the first character of
+  // the infinity, and nan-symbol. If either matches, the function assumes, that
+  // a match has been found, and expects the following input characters to match
+  // the remaining characters of the special-value symbol.
+  // This means that the following restrictions apply to special-value symbols:
+  //  - they must not start with signs ('+', or '-'),
+  //  - they must not have the same first character.
+  //  - they must not start with digits.
+  //
+  // Examples:
+  //  flags = ALLOW_HEX | ALLOW_TRAILING_JUNK,
+  //  empty_string_value = 0.0,
+  //  junk_string_value = NaN,
+  //  infinity_symbol = "infinity",
+  //  nan_symbol = "nan":
+  //    StringToDouble("0x1234") -> 4660.0.
+  //    StringToDouble("0x1234K") -> 4660.0.
+  //    StringToDouble("") -> 0.0  // empty_string_value.
+  //    StringToDouble(" ") -> NaN  // junk_string_value.
+  //    StringToDouble(" 1") -> NaN  // junk_string_value.
+  //    StringToDouble("0x") -> NaN  // junk_string_value.
+  //    StringToDouble("-123.45") -> -123.45.
+  //    StringToDouble("--123.45") -> NaN  // junk_string_value.
+  //    StringToDouble("123e45") -> 123e45.
+  //    StringToDouble("123E45") -> 123e45.
+  //    StringToDouble("123e+45") -> 123e45.
+  //    StringToDouble("123E-45") -> 123e-45.
+  //    StringToDouble("123e") -> 123.0  // trailing junk ignored.
+  //    StringToDouble("123e-") -> 123.0  // trailing junk ignored.
+  //    StringToDouble("+NaN") -> NaN  // NaN string literal.
+  //    StringToDouble("-infinity") -> -inf.  // infinity literal.
+  //    StringToDouble("Infinity") -> NaN  // junk_string_value.
+  //
+  //  flags = ALLOW_OCTAL | ALLOW_LEADING_SPACES,
+  //  empty_string_value = 0.0,
+  //  junk_string_value = NaN,
+  //  infinity_symbol = NULL,
+  //  nan_symbol = NULL:
+  //    StringToDouble("0x1234") -> NaN  // junk_string_value.
+  //    StringToDouble("01234") -> 668.0.
+  //    StringToDouble("") -> 0.0  // empty_string_value.
+  //    StringToDouble(" ") -> 0.0  // empty_string_value.
+  //    StringToDouble(" 1") -> 1.0
+  //    StringToDouble("0x") -> NaN  // junk_string_value.
+  //    StringToDouble("0123e45") -> NaN  // junk_string_value.
+  //    StringToDouble("01239E45") -> 1239e45.
+  //    StringToDouble("-infinity") -> NaN  // junk_string_value.
+  //    StringToDouble("NaN") -> NaN  // junk_string_value.
+  StringToDoubleConverter(int flags,
+                          double empty_string_value,
+                          double junk_string_value,
+                          const char* infinity_symbol,
+                          const char* nan_symbol)
+      : flags_(flags),
+        empty_string_value_(empty_string_value),
+        junk_string_value_(junk_string_value),
+        infinity_symbol_(infinity_symbol),
+        nan_symbol_(nan_symbol) {
+  }
+
+  // Performs the conversion.
+  // The output parameter 'processed_characters_count' is set to the number
+  // of characters that have been processed to read the number.
+  // Spaces than are processed with ALLOW_{LEADING|TRAILING}_SPACES are included
+  // in the 'processed_characters_count'. Trailing junk is never included.
+  double StringToDouble(const char* buffer,
+                        int length,
+                        int* processed_characters_count) const {
+    return StringToIeee(buffer, length, processed_characters_count, true);
+  }
+
+  // Same as StringToDouble but reads a float.
+  // Note that this is not equivalent to static_cast<float>(StringToDouble(...))
+  // due to potential double-rounding.
+  float StringToFloat(const char* buffer,
+                      int length,
+                      int* processed_characters_count) const {
+    return static_cast<float>(StringToIeee(buffer, length,
+                                           processed_characters_count, false));
+  }
+
+ private:
+  const int flags_;
+  const double empty_string_value_;
+  const double junk_string_value_;
+  const char* const infinity_symbol_;
+  const char* const nan_symbol_;
+
+  double StringToIeee(const char* buffer,
+                      int length,
+                      int* processed_characters_count,
+                      bool read_as_double) const;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(StringToDoubleConverter);
+};
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_
diff --git a/klm/util/double-conversion/fast-dtoa.cc b/klm/util/double-conversion/fast-dtoa.cc
new file mode 100644
index 00000000..1a0f8235
--- /dev/null
+++ b/klm/util/double-conversion/fast-dtoa.cc
@@ -0,0 +1,664 @@
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "fast-dtoa.h"
+
+#include "cached-powers.h"
+#include "diy-fp.h"
+#include "ieee.h"
+
+namespace double_conversion {
+
+// The minimal and maximal target exponent define the range of w's binary
+// exponent, where 'w' is the result of multiplying the input by a cached power
+// of ten.
+//
+// A different range might be chosen on a different platform, to optimize digit
+// generation, but a smaller range requires more powers of ten to be cached.
+static const int kMinimalTargetExponent = -60;
+static const int kMaximalTargetExponent = -32;
+
+
+// Adjusts the last digit of the generated number, and screens out generated
+// solutions that may be inaccurate. A solution may be inaccurate if it is
+// outside the safe interval, or if we cannot prove that it is closer to the
+// input than a neighboring representation of the same length.
+//
+// Input: * buffer containing the digits of too_high / 10^kappa
+//        * the buffer's length
+//        * distance_too_high_w == (too_high - w).f() * unit
+//        * unsafe_interval == (too_high - too_low).f() * unit
+//        * rest = (too_high - buffer * 10^kappa).f() * unit
+//        * ten_kappa = 10^kappa * unit
+//        * unit = the common multiplier
+// Output: returns true if the buffer is guaranteed to contain the closest
+//    representable number to the input.
+//  Modifies the generated digits in the buffer to approach (round towards) w.
+static bool RoundWeed(Vector<char> buffer,
+                      int length,
+                      uint64_t distance_too_high_w,
+                      uint64_t unsafe_interval,
+                      uint64_t rest,
+                      uint64_t ten_kappa,
+                      uint64_t unit) {
+  uint64_t small_distance = distance_too_high_w - unit;
+  uint64_t big_distance = distance_too_high_w + unit;
+  // Let w_low  = too_high - big_distance, and
+  //     w_high = too_high - small_distance.
+  // Note: w_low < w < w_high
+  //
+  // The real w (* unit) must lie somewhere inside the interval
+  // ]w_low; w_high[ (often written as "(w_low; w_high)")
+
+  // Basically the buffer currently contains a number in the unsafe interval
+  // ]too_low; too_high[ with too_low < w < too_high
+  //
+  //  too_high - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+  //                     ^v 1 unit            ^      ^                 ^      ^
+  //  boundary_high ---------------------     .      .                 .      .
+  //                     ^v 1 unit            .      .                 .      .
+  //   - - - - - - - - - - - - - - - - - - -  +  - - + - - - - - -     .      .
+  //                                          .      .         ^       .      .
+  //                                          .  big_distance  .       .      .
+  //                                          .      .         .       .    rest
+  //                              small_distance     .         .       .      .
+  //                                          v      .         .       .      .
+  //  w_high - - - - - - - - - - - - - - - - - -     .         .       .      .
+  //                     ^v 1 unit                   .         .       .      .
+  //  w ----------------------------------------     .         .       .      .
+  //                     ^v 1 unit                   v         .       .      .
+  //  w_low  - - - - - - - - - - - - - - - - - - - - -         .       .      .
+  //                                                           .       .      v
+  //  buffer --------------------------------------------------+-------+--------
+  //                                                           .       .
+  //                                                  safe_interval    .
+  //                                                           v       .
+  //   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -     .
+  //                     ^v 1 unit                                     .
+  //  boundary_low -------------------------                     unsafe_interval
+  //                     ^v 1 unit                                     v
+  //  too_low  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+  //
+  //
+  // Note that the value of buffer could lie anywhere inside the range too_low
+  // to too_high.
+  //
+  // boundary_low, boundary_high and w are approximations of the real boundaries
+  // and v (the input number). They are guaranteed to be precise up to one unit.
+  // In fact the error is guaranteed to be strictly less than one unit.
+  //
+  // Anything that lies outside the unsafe interval is guaranteed not to round
+  // to v when read again.
+  // Anything that lies inside the safe interval is guaranteed to round to v
+  // when read again.
+  // If the number inside the buffer lies inside the unsafe interval but not
+  // inside the safe interval then we simply do not know and bail out (returning
+  // false).
+  //
+  // Similarly we have to take into account the imprecision of 'w' when finding
+  // the closest representation of 'w'. If we have two potential
+  // representations, and one is closer to both w_low and w_high, then we know
+  // it is closer to the actual value v.
+  //
+  // By generating the digits of too_high we got the largest (closest to
+  // too_high) buffer that is still in the unsafe interval. In the case where
+  // w_high < buffer < too_high we try to decrement the buffer.
+  // This way the buffer approaches (rounds towards) w.
+  // There are 3 conditions that stop the decrementation process:
+  //   1) the buffer is already below w_high
+  //   2) decrementing the buffer would make it leave the unsafe interval
+  //   3) decrementing the buffer would yield a number below w_high and farther
+  //      away than the current number. In other words:
+  //              (buffer{-1} < w_high) && w_high - buffer{-1} > buffer - w_high
+  // Instead of using the buffer directly we use its distance to too_high.
+  // Conceptually rest ~= too_high - buffer
+  // We need to do the following tests in this order to avoid over- and
+  // underflows.
+  ASSERT(rest <= unsafe_interval);
+  while (rest < small_distance &&  // Negated condition 1
+         unsafe_interval - rest >= ten_kappa &&  // Negated condition 2
+         (rest + ten_kappa < small_distance ||  // buffer{-1} > w_high
+          small_distance - rest >= rest + ten_kappa - small_distance)) {
+    buffer[length - 1]--;
+    rest += ten_kappa;
+  }
+
+  // We have approached w+ as much as possible. We now test if approaching w-
+  // would require changing the buffer. If yes, then we have two possible
+  // representations close to w, but we cannot decide which one is closer.
+  if (rest < big_distance &&
+      unsafe_interval - rest >= ten_kappa &&
+      (rest + ten_kappa < big_distance ||
+       big_distance - rest > rest + ten_kappa - big_distance)) {
+    return false;
+  }
+
+  // Weeding test.
+  //   The safe interval is [too_low + 2 ulp; too_high - 2 ulp]
+  //   Since too_low = too_high - unsafe_interval this is equivalent to
+  //      [too_high - unsafe_interval + 4 ulp; too_high - 2 ulp]
+  //   Conceptually we have: rest ~= too_high - buffer
+  return (2 * unit <= rest) && (rest <= unsafe_interval - 4 * unit);
+}
+
+
+// Rounds the buffer upwards if the result is closer to v by possibly adding
+// 1 to the buffer. If the precision of the calculation is not sufficient to
+// round correctly, return false.
+// The rounding might shift the whole buffer in which case the kappa is
+// adjusted. For example "99", kappa = 3 might become "10", kappa = 4.
+//
+// If 2*rest > ten_kappa then the buffer needs to be round up.
+// rest can have an error of +/- 1 unit. This function accounts for the
+// imprecision and returns false, if the rounding direction cannot be
+// unambiguously determined.
+//
+// Precondition: rest < ten_kappa.
+static bool RoundWeedCounted(Vector<char> buffer,
+                             int length,
+                             uint64_t rest,
+                             uint64_t ten_kappa,
+                             uint64_t unit,
+                             int* kappa) {
+  ASSERT(rest < ten_kappa);
+  // The following tests are done in a specific order to avoid overflows. They
+  // will work correctly with any uint64 values of rest < ten_kappa and unit.
+  //
+  // If the unit is too big, then we don't know which way to round. For example
+  // a unit of 50 means that the real number lies within rest +/- 50. If
+  // 10^kappa == 40 then there is no way to tell which way to round.
+  if (unit >= ten_kappa) return false;
+  // Even if unit is just half the size of 10^kappa we are already completely
+  // lost. (And after the previous test we know that the expression will not
+  // over/underflow.)
+  if (ten_kappa - unit <= unit) return false;
+  // If 2 * (rest + unit) <= 10^kappa we can safely round down.
+  if ((ten_kappa - rest > rest) && (ten_kappa - 2 * rest >= 2 * unit)) {
+    return true;
+  }
+  // If 2 * (rest - unit) >= 10^kappa, then we can safely round up.
+  if ((rest > unit) && (ten_kappa - (rest - unit) <= (rest - unit))) {
+    // Increment the last digit recursively until we find a non '9' digit.
+    buffer[length - 1]++;
+    for (int i = length - 1; i > 0; --i) {
+      if (buffer[i] != '0' + 10) break;
+      buffer[i] = '0';
+      buffer[i - 1]++;
+    }
+    // If the first digit is now '0'+ 10 we had a buffer with all '9's. With the
+    // exception of the first digit all digits are now '0'. Simply switch the
+    // first digit to '1' and adjust the kappa. Example: "99" becomes "10" and
+    // the power (the kappa) is increased.
+    if (buffer[0] == '0' + 10) {
+      buffer[0] = '1';
+      (*kappa) += 1;
+    }
+    return true;
+  }
+  return false;
+}
+
+// Returns the biggest power of ten that is less than or equal to the given
+// number. We furthermore receive the maximum number of bits 'number' has.
+//
+// Returns power == 10^(exponent_plus_one-1) such that
+//    power <= number < power * 10.
+// If number_bits == 0 then 0^(0-1) is returned.
+// The number of bits must be <= 32.
+// Precondition: number < (1 << (number_bits + 1)).
+
+// Inspired by the method for finding an integer log base 10 from here:
+// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+static unsigned int const kSmallPowersOfTen[] =
+    {0, 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000,
+     1000000000};
+
+static void BiggestPowerTen(uint32_t number,
+                            int number_bits,
+                            uint32_t* power,
+                            int* exponent_plus_one) {
+  ASSERT(number < (1u << (number_bits + 1)));
+  // 1233/4096 is approximately 1/lg(10).
+  int exponent_plus_one_guess = ((number_bits + 1) * 1233 >> 12);
+  // We increment to skip over the first entry in the kPowersOf10 table.
+  // Note: kPowersOf10[i] == 10^(i-1).
+  exponent_plus_one_guess++;
+  // We don't have any guarantees that 2^number_bits <= number.
+  // TODO(floitsch): can we change the 'while' into an 'if'? We definitely see
+  // number < (2^number_bits - 1), but I haven't encountered
+  // number < (2^number_bits - 2) yet.
+  while (number < kSmallPowersOfTen[exponent_plus_one_guess]) {
+    exponent_plus_one_guess--;
+  }
+  *power = kSmallPowersOfTen[exponent_plus_one_guess];
+  *exponent_plus_one = exponent_plus_one_guess;
+}
+
+// Generates the digits of input number w.
+// w is a floating-point number (DiyFp), consisting of a significand and an
+// exponent. Its exponent is bounded by kMinimalTargetExponent and
+// kMaximalTargetExponent.
+//       Hence -60 <= w.e() <= -32.
+//
+// Returns false if it fails, in which case the generated digits in the buffer
+// should not be used.
+// Preconditions:
+//  * low, w and high are correct up to 1 ulp (unit in the last place). That
+//    is, their error must be less than a unit of their last digits.
+//  * low.e() == w.e() == high.e()
+//  * low < w < high, and taking into account their error: low~ <= high~
+//  * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent
+// Postconditions: returns false if procedure fails.
+//   otherwise:
+//     * buffer is not null-terminated, but len contains the number of digits.
+//     * buffer contains the shortest possible decimal digit-sequence
+//       such that LOW < buffer * 10^kappa < HIGH, where LOW and HIGH are the
+//       correct values of low and high (without their error).
+//     * if more than one decimal representation gives the minimal number of
+//       decimal digits then the one closest to W (where W is the correct value
+//       of w) is chosen.
+// Remark: this procedure takes into account the imprecision of its input
+//   numbers. If the precision is not enough to guarantee all the postconditions
+//   then false is returned. This usually happens rarely (~0.5%).
+//
+// Say, for the sake of example, that
+//   w.e() == -48, and w.f() == 0x1234567890abcdef
+// w's value can be computed by w.f() * 2^w.e()
+// We can obtain w's integral digits by simply shifting w.f() by -w.e().
+//  -> w's integral part is 0x1234
+//  w's fractional part is therefore 0x567890abcdef.
+// Printing w's integral part is easy (simply print 0x1234 in decimal).
+// In order to print its fraction we repeatedly multiply the fraction by 10 and
+// get each digit. Example the first digit after the point would be computed by
+//   (0x567890abcdef * 10) >> 48. -> 3
+// The whole thing becomes slightly more complicated because we want to stop
+// once we have enough digits. That is, once the digits inside the buffer
+// represent 'w' we can stop. Everything inside the interval low - high
+// represents w. However we have to pay attention to low, high and w's
+// imprecision.
+static bool DigitGen(DiyFp low,
+                     DiyFp w,
+                     DiyFp high,
+                     Vector<char> buffer,
+                     int* length,
+                     int* kappa) {
+  ASSERT(low.e() == w.e() && w.e() == high.e());
+  ASSERT(low.f() + 1 <= high.f() - 1);
+  ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent);
+  // low, w and high are imprecise, but by less than one ulp (unit in the last
+  // place).
+  // If we remove (resp. add) 1 ulp from low (resp. high) we are certain that
+  // the new numbers are outside of the interval we want the final
+  // representation to lie in.
+  // Inversely adding (resp. removing) 1 ulp from low (resp. high) would yield
+  // numbers that are certain to lie in the interval. We will use this fact
+  // later on.
+  // We will now start by generating the digits within the uncertain
+  // interval. Later we will weed out representations that lie outside the safe
+  // interval and thus _might_ lie outside the correct interval.
+  uint64_t unit = 1;
+  DiyFp too_low = DiyFp(low.f() - unit, low.e());
+  DiyFp too_high = DiyFp(high.f() + unit, high.e());
+  // too_low and too_high are guaranteed to lie outside the interval we want the
+  // generated number in.
+  DiyFp unsafe_interval = DiyFp::Minus(too_high, too_low);
+  // We now cut the input number into two parts: the integral digits and the
+  // fractionals. We will not write any decimal separator though, but adapt
+  // kappa instead.
+  // Reminder: we are currently computing the digits (stored inside the buffer)
+  // such that:   too_low < buffer * 10^kappa < too_high
+  // We use too_high for the digit_generation and stop as soon as possible.
+  // If we stop early we effectively round down.
+  DiyFp one = DiyFp(static_cast<uint64_t>(1) << -w.e(), w.e());
+  // Division by one is a shift.
+  uint32_t integrals = static_cast<uint32_t>(too_high.f() >> -one.e());
+  // Modulo by one is an and.
+  uint64_t fractionals = too_high.f() & (one.f() - 1);
+  uint32_t divisor;
+  int divisor_exponent_plus_one;
+  BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()),
+                  &divisor, &divisor_exponent_plus_one);
+  *kappa = divisor_exponent_plus_one;
+  *length = 0;
+  // Loop invariant: buffer = too_high / 10^kappa  (integer division)
+  // The invariant holds for the first iteration: kappa has been initialized
+  // with the divisor exponent + 1. And the divisor is the biggest power of ten
+  // that is smaller than integrals.
+  while (*kappa > 0) {
+    int digit = integrals / divisor;
+    buffer[*length] = '0' + digit;
+    (*length)++;
+    integrals %= divisor;
+    (*kappa)--;
+    // Note that kappa now equals the exponent of the divisor and that the
+    // invariant thus holds again.
+    uint64_t rest =
+        (static_cast<uint64_t>(integrals) << -one.e()) + fractionals;
+    // Invariant: too_high = buffer * 10^kappa + DiyFp(rest, one.e())
+    // Reminder: unsafe_interval.e() == one.e()
+    if (rest < unsafe_interval.f()) {
+      // Rounding down (by not emitting the remaining digits) yields a number
+      // that lies within the unsafe interval.
+      return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f(),
+                       unsafe_interval.f(), rest,
+                       static_cast<uint64_t>(divisor) << -one.e(), unit);
+    }
+    divisor /= 10;
+  }
+
+  // The integrals have been generated. We are at the point of the decimal
+  // separator. In the following loop we simply multiply the remaining digits by
+  // 10 and divide by one. We just need to pay attention to multiply associated
+  // data (like the interval or 'unit'), too.
+  // Note that the multiplication by 10 does not overflow, because w.e >= -60
+  // and thus one.e >= -60.
+  ASSERT(one.e() >= -60);
+  ASSERT(fractionals < one.f());
+  ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f());
+  while (true) {
+    fractionals *= 10;
+    unit *= 10;
+    unsafe_interval.set_f(unsafe_interval.f() * 10);
+    // Integer division by one.
+    int digit = static_cast<int>(fractionals >> -one.e());
+    buffer[*length] = '0' + digit;
+    (*length)++;
+    fractionals &= one.f() - 1;  // Modulo by one.
+    (*kappa)--;
+    if (fractionals < unsafe_interval.f()) {
+      return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f() * unit,
+                       unsafe_interval.f(), fractionals, one.f(), unit);
+    }
+  }
+}
+
+
+
+// Generates (at most) requested_digits digits of input number w.
+// w is a floating-point number (DiyFp), consisting of a significand and an
+// exponent. Its exponent is bounded by kMinimalTargetExponent and
+// kMaximalTargetExponent.
+//       Hence -60 <= w.e() <= -32.
+//
+// Returns false if it fails, in which case the generated digits in the buffer
+// should not be used.
+// Preconditions:
+//  * w is correct up to 1 ulp (unit in the last place). That
+//    is, its error must be strictly less than a unit of its last digit.
+//  * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent
+//
+// Postconditions: returns false if procedure fails.
+//   otherwise:
+//     * buffer is not null-terminated, but length contains the number of
+//       digits.
+//     * the representation in buffer is the most precise representation of
+//       requested_digits digits.
+//     * buffer contains at most requested_digits digits of w. If there are less
+//       than requested_digits digits then some trailing '0's have been removed.
+//     * kappa is such that
+//            w = buffer * 10^kappa + eps with |eps| < 10^kappa / 2.
+//
+// Remark: This procedure takes into account the imprecision of its input
+//   numbers. If the precision is not enough to guarantee all the postconditions
+//   then false is returned. This usually happens rarely, but the failure-rate
+//   increases with higher requested_digits.
+static bool DigitGenCounted(DiyFp w,
+                            int requested_digits,
+                            Vector<char> buffer,
+                            int* length,
+                            int* kappa) {
+  ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent);
+  ASSERT(kMinimalTargetExponent >= -60);
+  ASSERT(kMaximalTargetExponent <= -32);
+  // w is assumed to have an error less than 1 unit. Whenever w is scaled we
+  // also scale its error.
+  uint64_t w_error = 1;
+  // We cut the input number into two parts: the integral digits and the
+  // fractional digits. We don't emit any decimal separator, but adapt kappa
+  // instead. Example: instead of writing "1.2" we put "12" into the buffer and
+  // increase kappa by 1.
+  DiyFp one = DiyFp(static_cast<uint64_t>(1) << -w.e(), w.e());
+  // Division by one is a shift.
+  uint32_t integrals = static_cast<uint32_t>(w.f() >> -one.e());
+  // Modulo by one is an and.
+  uint64_t fractionals = w.f() & (one.f() - 1);
+  uint32_t divisor;
+  int divisor_exponent_plus_one;
+  BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()),
+                  &divisor, &divisor_exponent_plus_one);
+  *kappa = divisor_exponent_plus_one;
+  *length = 0;
+
+  // Loop invariant: buffer = w / 10^kappa  (integer division)
+  // The invariant holds for the first iteration: kappa has been initialized
+  // with the divisor exponent + 1. And the divisor is the biggest power of ten
+  // that is smaller than 'integrals'.
+  while (*kappa > 0) {
+    int digit = integrals / divisor;
+    buffer[*length] = '0' + digit;
+    (*length)++;
+    requested_digits--;
+    integrals %= divisor;
+    (*kappa)--;
+    // Note that kappa now equals the exponent of the divisor and that the
+    // invariant thus holds again.
+    if (requested_digits == 0) break;
+    divisor /= 10;
+  }
+
+  if (requested_digits == 0) {
+    uint64_t rest =
+        (static_cast<uint64_t>(integrals) << -one.e()) + fractionals;
+    return RoundWeedCounted(buffer, *length, rest,
+                            static_cast<uint64_t>(divisor) << -one.e(), w_error,
+                            kappa);
+  }
+
+  // The integrals have been generated. We are at the point of the decimal
+  // separator. In the following loop we simply multiply the remaining digits by
+  // 10 and divide by one. We just need to pay attention to multiply associated
+  // data (the 'unit'), too.
+  // Note that the multiplication by 10 does not overflow, because w.e >= -60
+  // and thus one.e >= -60.
+  ASSERT(one.e() >= -60);
+  ASSERT(fractionals < one.f());
+  ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f());
+  while (requested_digits > 0 && fractionals > w_error) {
+    fractionals *= 10;
+    w_error *= 10;
+    // Integer division by one.
+    int digit = static_cast<int>(fractionals >> -one.e());
+    buffer[*length] = '0' + digit;
+    (*length)++;
+    requested_digits--;
+    fractionals &= one.f() - 1;  // Modulo by one.
+    (*kappa)--;
+  }
+  if (requested_digits != 0) return false;
+  return RoundWeedCounted(buffer, *length, fractionals, one.f(), w_error,
+                          kappa);
+}
+
+
+// Provides a decimal representation of v.
+// Returns true if it succeeds, otherwise the result cannot be trusted.
+// There will be *length digits inside the buffer (not null-terminated).
+// If the function returns true then
+//        v == (double) (buffer * 10^decimal_exponent).
+// The digits in the buffer are the shortest representation possible: no
+// 0.09999999999999999 instead of 0.1. The shorter representation will even be
+// chosen even if the longer one would be closer to v.
+// The last digit will be closest to the actual v. That is, even if several
+// digits might correctly yield 'v' when read again, the closest will be
+// computed.
+static bool Grisu3(double v,
+                   FastDtoaMode mode,
+                   Vector<char> buffer,
+                   int* length,
+                   int* decimal_exponent) {
+  DiyFp w = Double(v).AsNormalizedDiyFp();
+  // boundary_minus and boundary_plus are the boundaries between v and its
+  // closest floating-point neighbors. Any number strictly between
+  // boundary_minus and boundary_plus will round to v when convert to a double.
+  // Grisu3 will never output representations that lie exactly on a boundary.
+  DiyFp boundary_minus, boundary_plus;
+  if (mode == FAST_DTOA_SHORTEST) {
+    Double(v).NormalizedBoundaries(&boundary_minus, &boundary_plus);
+  } else {
+    ASSERT(mode == FAST_DTOA_SHORTEST_SINGLE);
+    float single_v = static_cast<float>(v);
+    Single(single_v).NormalizedBoundaries(&boundary_minus, &boundary_plus);
+  }
+  ASSERT(boundary_plus.e() == w.e());
+  DiyFp ten_mk;  // Cached power of ten: 10^-k
+  int mk;        // -k
+  int ten_mk_minimal_binary_exponent =
+     kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize);
+  int ten_mk_maximal_binary_exponent =
+     kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize);
+  PowersOfTenCache::GetCachedPowerForBinaryExponentRange(
+      ten_mk_minimal_binary_exponent,
+      ten_mk_maximal_binary_exponent,
+      &ten_mk, &mk);
+  ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() +
+          DiyFp::kSignificandSize) &&
+         (kMaximalTargetExponent >= w.e() + ten_mk.e() +
+          DiyFp::kSignificandSize));
+  // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a
+  // 64 bit significand and ten_mk is thus only precise up to 64 bits.
+
+  // The DiyFp::Times procedure rounds its result, and ten_mk is approximated
+  // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now
+  // off by a small amount.
+  // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w.
+  // In other words: let f = scaled_w.f() and e = scaled_w.e(), then
+  //           (f-1) * 2^e < w*10^k < (f+1) * 2^e
+  DiyFp scaled_w = DiyFp::Times(w, ten_mk);
+  ASSERT(scaled_w.e() ==
+         boundary_plus.e() + ten_mk.e() + DiyFp::kSignificandSize);
+  // In theory it would be possible to avoid some recomputations by computing
+  // the difference between w and boundary_minus/plus (a power of 2) and to
+  // compute scaled_boundary_minus/plus by subtracting/adding from
+  // scaled_w. However the code becomes much less readable and the speed
+  // enhancements are not terriffic.
+  DiyFp scaled_boundary_minus = DiyFp::Times(boundary_minus, ten_mk);
+  DiyFp scaled_boundary_plus  = DiyFp::Times(boundary_plus,  ten_mk);
+
+  // DigitGen will generate the digits of scaled_w. Therefore we have
+  // v == (double) (scaled_w * 10^-mk).
+  // Set decimal_exponent == -mk and pass it to DigitGen. If scaled_w is not an
+  // integer than it will be updated. For instance if scaled_w == 1.23 then
+  // the buffer will be filled with "123" und the decimal_exponent will be
+  // decreased by 2.
+  int kappa;
+  bool result = DigitGen(scaled_boundary_minus, scaled_w, scaled_boundary_plus,
+                         buffer, length, &kappa);
+  *decimal_exponent = -mk + kappa;
+  return result;
+}
+
+
+// The "counted" version of grisu3 (see above) only generates requested_digits
+// number of digits. This version does not generate the shortest representation,
+// and with enough requested digits 0.1 will at some point print as 0.9999999...
+// Grisu3 is too imprecise for real halfway cases (1.5 will not work) and
+// therefore the rounding strategy for halfway cases is irrelevant.
+static bool Grisu3Counted(double v,
+                          int requested_digits,
+                          Vector<char> buffer,
+                          int* length,
+                          int* decimal_exponent) {
+  DiyFp w = Double(v).AsNormalizedDiyFp();
+  DiyFp ten_mk;  // Cached power of ten: 10^-k
+  int mk;        // -k
+  int ten_mk_minimal_binary_exponent =
+     kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize);
+  int ten_mk_maximal_binary_exponent =
+     kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize);
+  PowersOfTenCache::GetCachedPowerForBinaryExponentRange(
+      ten_mk_minimal_binary_exponent,
+      ten_mk_maximal_binary_exponent,
+      &ten_mk, &mk);
+  ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() +
+          DiyFp::kSignificandSize) &&
+         (kMaximalTargetExponent >= w.e() + ten_mk.e() +
+          DiyFp::kSignificandSize));
+  // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a
+  // 64 bit significand and ten_mk is thus only precise up to 64 bits.
+
+  // The DiyFp::Times procedure rounds its result, and ten_mk is approximated
+  // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now
+  // off by a small amount.
+  // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w.
+  // In other words: let f = scaled_w.f() and e = scaled_w.e(), then
+  //           (f-1) * 2^e < w*10^k < (f+1) * 2^e
+  DiyFp scaled_w = DiyFp::Times(w, ten_mk);
+
+  // We now have (double) (scaled_w * 10^-mk).
+  // DigitGen will generate the first requested_digits digits of scaled_w and
+  // return together with a kappa such that scaled_w ~= buffer * 10^kappa. (It
+  // will not always be exactly the same since DigitGenCounted only produces a
+  // limited number of digits.)
+  int kappa;
+  bool result = DigitGenCounted(scaled_w, requested_digits,
+                                buffer, length, &kappa);
+  *decimal_exponent = -mk + kappa;
+  return result;
+}
+
+
+bool FastDtoa(double v,
+              FastDtoaMode mode,
+              int requested_digits,
+              Vector<char> buffer,
+              int* length,
+              int* decimal_point) {
+  ASSERT(v > 0);
+  ASSERT(!Double(v).IsSpecial());
+
+  bool result = false;
+  int decimal_exponent = 0;
+  switch (mode) {
+    case FAST_DTOA_SHORTEST:
+    case FAST_DTOA_SHORTEST_SINGLE:
+      result = Grisu3(v, mode, buffer, length, &decimal_exponent);
+      break;
+    case FAST_DTOA_PRECISION:
+      result = Grisu3Counted(v, requested_digits,
+                             buffer, length, &decimal_exponent);
+      break;
+    default:
+      UNREACHABLE();
+  }
+  if (result) {
+    *decimal_point = *length + decimal_exponent;
+    buffer[*length] = '\0';
+  }
+  return result;
+}
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/fast-dtoa.h b/klm/util/double-conversion/fast-dtoa.h
new file mode 100644
index 00000000..5f1e8eee
--- /dev/null
+++ b/klm/util/double-conversion/fast-dtoa.h
@@ -0,0 +1,88 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_FAST_DTOA_H_
+#define DOUBLE_CONVERSION_FAST_DTOA_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+enum FastDtoaMode {
+  // Computes the shortest representation of the given input. The returned
+  // result will be the most accurate number of this length. Longer
+  // representations might be more accurate.
+  FAST_DTOA_SHORTEST,
+  // Same as FAST_DTOA_SHORTEST but for single-precision floats.
+  FAST_DTOA_SHORTEST_SINGLE,
+  // Computes a representation where the precision (number of digits) is
+  // given as input. The precision is independent of the decimal point.
+  FAST_DTOA_PRECISION
+};
+
+// FastDtoa will produce at most kFastDtoaMaximalLength digits. This does not
+// include the terminating '\0' character.
+static const int kFastDtoaMaximalLength = 17;
+// Same for single-precision numbers.
+static const int kFastDtoaMaximalSingleLength = 9;
+
+// Provides a decimal representation of v.
+// The result should be interpreted as buffer * 10^(point - length).
+//
+// Precondition:
+//   * v must be a strictly positive finite double.
+//
+// Returns true if it succeeds, otherwise the result can not be trusted.
+// There will be *length digits inside the buffer followed by a null terminator.
+// If the function returns true and mode equals
+//   - FAST_DTOA_SHORTEST, then
+//     the parameter requested_digits is ignored.
+//     The result satisfies
+//         v == (double) (buffer * 10^(point - length)).
+//     The digits in the buffer are the shortest representation possible. E.g.
+//     if 0.099999999999 and 0.1 represent the same double then "1" is returned
+//     with point = 0.
+//     The last digit will be closest to the actual v. That is, even if several
+//     digits might correctly yield 'v' when read again, the buffer will contain
+//     the one closest to v.
+//   - FAST_DTOA_PRECISION, then
+//     the buffer contains requested_digits digits.
+//     the difference v - (buffer * 10^(point-length)) is closest to zero for
+//     all possible representations of requested_digits digits.
+//     If there are two values that are equally close, then FastDtoa returns
+//     false.
+// For both modes the buffer must be large enough to hold the result.
+bool FastDtoa(double d,
+              FastDtoaMode mode,
+              int requested_digits,
+              Vector<char> buffer,
+              int* length,
+              int* decimal_point);
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_FAST_DTOA_H_
diff --git a/klm/util/double-conversion/fixed-dtoa.cc b/klm/util/double-conversion/fixed-dtoa.cc
new file mode 100644
index 00000000..d56b1449
--- /dev/null
+++ b/klm/util/double-conversion/fixed-dtoa.cc
@@ -0,0 +1,402 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <math.h>
+
+#include "fixed-dtoa.h"
+#include "ieee.h"
+
+namespace double_conversion {
+
+// Represents a 128bit type. This class should be replaced by a native type on
+// platforms that support 128bit integers.
+class UInt128 {
+ public:
+  UInt128() : high_bits_(0), low_bits_(0) { }
+  UInt128(uint64_t high, uint64_t low) : high_bits_(high), low_bits_(low) { }
+
+  void Multiply(uint32_t multiplicand) {
+    uint64_t accumulator;
+
+    accumulator = (low_bits_ & kMask32) * multiplicand;
+    uint32_t part = static_cast<uint32_t>(accumulator & kMask32);
+    accumulator >>= 32;
+    accumulator = accumulator + (low_bits_ >> 32) * multiplicand;
+    low_bits_ = (accumulator << 32) + part;
+    accumulator >>= 32;
+    accumulator = accumulator + (high_bits_ & kMask32) * multiplicand;
+    part = static_cast<uint32_t>(accumulator & kMask32);
+    accumulator >>= 32;
+    accumulator = accumulator + (high_bits_ >> 32) * multiplicand;
+    high_bits_ = (accumulator << 32) + part;
+    ASSERT((accumulator >> 32) == 0);
+  }
+
+  void Shift(int shift_amount) {
+    ASSERT(-64 <= shift_amount && shift_amount <= 64);
+    if (shift_amount == 0) {
+      return;
+    } else if (shift_amount == -64) {
+      high_bits_ = low_bits_;
+      low_bits_ = 0;
+    } else if (shift_amount == 64) {
+      low_bits_ = high_bits_;
+      high_bits_ = 0;
+    } else if (shift_amount <= 0) {
+      high_bits_ <<= -shift_amount;
+      high_bits_ += low_bits_ >> (64 + shift_amount);
+      low_bits_ <<= -shift_amount;
+    } else {
+      low_bits_ >>= shift_amount;
+      low_bits_ += high_bits_ << (64 - shift_amount);
+      high_bits_ >>= shift_amount;
+    }
+  }
+
+  // Modifies *this to *this MOD (2^power).
+  // Returns *this DIV (2^power).
+  int DivModPowerOf2(int power) {
+    if (power >= 64) {
+      int result = static_cast<int>(high_bits_ >> (power - 64));
+      high_bits_ -= static_cast<uint64_t>(result) << (power - 64);
+      return result;
+    } else {
+      uint64_t part_low = low_bits_ >> power;
+      uint64_t part_high = high_bits_ << (64 - power);
+      int result = static_cast<int>(part_low + part_high);
+      high_bits_ = 0;
+      low_bits_ -= part_low << power;
+      return result;
+    }
+  }
+
+  bool IsZero() const {
+    return high_bits_ == 0 && low_bits_ == 0;
+  }
+
+  int BitAt(int position) {
+    if (position >= 64) {
+      return static_cast<int>(high_bits_ >> (position - 64)) & 1;
+    } else {
+      return static_cast<int>(low_bits_ >> position) & 1;
+    }
+  }
+
+ private:
+  static const uint64_t kMask32 = 0xFFFFFFFF;
+  // Value == (high_bits_ << 64) + low_bits_
+  uint64_t high_bits_;
+  uint64_t low_bits_;
+};
+
+
+static const int kDoubleSignificandSize = 53;  // Includes the hidden bit.
+
+
+static void FillDigits32FixedLength(uint32_t number, int requested_length,
+                                    Vector<char> buffer, int* length) {
+  for (int i = requested_length - 1; i >= 0; --i) {
+    buffer[(*length) + i] = '0' + number % 10;
+    number /= 10;
+  }
+  *length += requested_length;
+}
+
+
+static void FillDigits32(uint32_t number, Vector<char> buffer, int* length) {
+  int number_length = 0;
+  // We fill the digits in reverse order and exchange them afterwards.
+  while (number != 0) {
+    int digit = number % 10;
+    number /= 10;
+    buffer[(*length) + number_length] = '0' + digit;
+    number_length++;
+  }
+  // Exchange the digits.
+  int i = *length;
+  int j = *length + number_length - 1;
+  while (i < j) {
+    char tmp = buffer[i];
+    buffer[i] = buffer[j];
+    buffer[j] = tmp;
+    i++;
+    j--;
+  }
+  *length += number_length;
+}
+
+
+static void FillDigits64FixedLength(uint64_t number, int requested_length,
+                                    Vector<char> buffer, int* length) {
+  const uint32_t kTen7 = 10000000;
+  // For efficiency cut the number into 3 uint32_t parts, and print those.
+  uint32_t part2 = static_cast<uint32_t>(number % kTen7);
+  number /= kTen7;
+  uint32_t part1 = static_cast<uint32_t>(number % kTen7);
+  uint32_t part0 = static_cast<uint32_t>(number / kTen7);
+
+  FillDigits32FixedLength(part0, 3, buffer, length);
+  FillDigits32FixedLength(part1, 7, buffer, length);
+  FillDigits32FixedLength(part2, 7, buffer, length);
+}
+
+
+static void FillDigits64(uint64_t number, Vector<char> buffer, int* length) {
+  const uint32_t kTen7 = 10000000;
+  // For efficiency cut the number into 3 uint32_t parts, and print those.
+  uint32_t part2 = static_cast<uint32_t>(number % kTen7);
+  number /= kTen7;
+  uint32_t part1 = static_cast<uint32_t>(number % kTen7);
+  uint32_t part0 = static_cast<uint32_t>(number / kTen7);
+
+  if (part0 != 0) {
+    FillDigits32(part0, buffer, length);
+    FillDigits32FixedLength(part1, 7, buffer, length);
+    FillDigits32FixedLength(part2, 7, buffer, length);
+  } else if (part1 != 0) {
+    FillDigits32(part1, buffer, length);
+    FillDigits32FixedLength(part2, 7, buffer, length);
+  } else {
+    FillDigits32(part2, buffer, length);
+  }
+}
+
+
+static void RoundUp(Vector<char> buffer, int* length, int* decimal_point) {
+  // An empty buffer represents 0.
+  if (*length == 0) {
+    buffer[0] = '1';
+    *decimal_point = 1;
+    *length = 1;
+    return;
+  }
+  // Round the last digit until we either have a digit that was not '9' or until
+  // we reached the first digit.
+  buffer[(*length) - 1]++;
+  for (int i = (*length) - 1; i > 0; --i) {
+    if (buffer[i] != '0' + 10) {
+      return;
+    }
+    buffer[i] = '0';
+    buffer[i - 1]++;
+  }
+  // If the first digit is now '0' + 10, we would need to set it to '0' and add
+  // a '1' in front. However we reach the first digit only if all following
+  // digits had been '9' before rounding up. Now all trailing digits are '0' and
+  // we simply switch the first digit to '1' and update the decimal-point
+  // (indicating that the point is now one digit to the right).
+  if (buffer[0] == '0' + 10) {
+    buffer[0] = '1';
+    (*decimal_point)++;
+  }
+}
+
+
+// The given fractionals number represents a fixed-point number with binary
+// point at bit (-exponent).
+// Preconditions:
+//   -128 <= exponent <= 0.
+//   0 <= fractionals * 2^exponent < 1
+//   The buffer holds the result.
+// The function will round its result. During the rounding-process digits not
+// generated by this function might be updated, and the decimal-point variable
+// might be updated. If this function generates the digits 99 and the buffer
+// already contained "199" (thus yielding a buffer of "19999") then a
+// rounding-up will change the contents of the buffer to "20000".
+static void FillFractionals(uint64_t fractionals, int exponent,
+                            int fractional_count, Vector<char> buffer,
+                            int* length, int* decimal_point) {
+  ASSERT(-128 <= exponent && exponent <= 0);
+  // 'fractionals' is a fixed-point number, with binary point at bit
+  // (-exponent). Inside the function the non-converted remainder of fractionals
+  // is a fixed-point number, with binary point at bit 'point'.
+  if (-exponent <= 64) {
+    // One 64 bit number is sufficient.
+    ASSERT(fractionals >> 56 == 0);
+    int point = -exponent;
+    for (int i = 0; i < fractional_count; ++i) {
+      if (fractionals == 0) break;
+      // Instead of multiplying by 10 we multiply by 5 and adjust the point
+      // location. This way the fractionals variable will not overflow.
+      // Invariant at the beginning of the loop: fractionals < 2^point.
+      // Initially we have: point <= 64 and fractionals < 2^56
+      // After each iteration the point is decremented by one.
+      // Note that 5^3 = 125 < 128 = 2^7.
+      // Therefore three iterations of this loop will not overflow fractionals
+      // (even without the subtraction at the end of the loop body). At this
+      // time point will satisfy point <= 61 and therefore fractionals < 2^point
+      // and any further multiplication of fractionals by 5 will not overflow.
+      fractionals *= 5;
+      point--;
+      int digit = static_cast<int>(fractionals >> point);
+      buffer[*length] = '0' + digit;
+      (*length)++;
+      fractionals -= static_cast<uint64_t>(digit) << point;
+    }
+    // If the first bit after the point is set we have to round up.
+    if (((fractionals >> (point - 1)) & 1) == 1) {
+      RoundUp(buffer, length, decimal_point);
+    }
+  } else {  // We need 128 bits.
+    ASSERT(64 < -exponent && -exponent <= 128);
+    UInt128 fractionals128 = UInt128(fractionals, 0);
+    fractionals128.Shift(-exponent - 64);
+    int point = 128;
+    for (int i = 0; i < fractional_count; ++i) {
+      if (fractionals128.IsZero()) break;
+      // As before: instead of multiplying by 10 we multiply by 5 and adjust the
+      // point location.
+      // This multiplication will not overflow for the same reasons as before.
+      fractionals128.Multiply(5);
+      point--;
+      int digit = fractionals128.DivModPowerOf2(point);
+      buffer[*length] = '0' + digit;
+      (*length)++;
+    }
+    if (fractionals128.BitAt(point - 1) == 1) {
+      RoundUp(buffer, length, decimal_point);
+    }
+  }
+}
+
+
+// Removes leading and trailing zeros.
+// If leading zeros are removed then the decimal point position is adjusted.
+static void TrimZeros(Vector<char> buffer, int* length, int* decimal_point) {
+  while (*length > 0 && buffer[(*length) - 1] == '0') {
+    (*length)--;
+  }
+  int first_non_zero = 0;
+  while (first_non_zero < *length && buffer[first_non_zero] == '0') {
+    first_non_zero++;
+  }
+  if (first_non_zero != 0) {
+    for (int i = first_non_zero; i < *length; ++i) {
+      buffer[i - first_non_zero] = buffer[i];
+    }
+    *length -= first_non_zero;
+    *decimal_point -= first_non_zero;
+  }
+}
+
+
+bool FastFixedDtoa(double v,
+                   int fractional_count,
+                   Vector<char> buffer,
+                   int* length,
+                   int* decimal_point) {
+  const uint32_t kMaxUInt32 = 0xFFFFFFFF;
+  uint64_t significand = Double(v).Significand();
+  int exponent = Double(v).Exponent();
+  // v = significand * 2^exponent (with significand a 53bit integer).
+  // If the exponent is larger than 20 (i.e. we may have a 73bit number) then we
+  // don't know how to compute the representation. 2^73 ~= 9.5*10^21.
+  // If necessary this limit could probably be increased, but we don't need
+  // more.
+  if (exponent > 20) return false;
+  if (fractional_count > 20) return false;
+  *length = 0;
+  // At most kDoubleSignificandSize bits of the significand are non-zero.
+  // Given a 64 bit integer we have 11 0s followed by 53 potentially non-zero
+  // bits:  0..11*..0xxx..53*..xx
+  if (exponent + kDoubleSignificandSize > 64) {
+    // The exponent must be > 11.
+    //
+    // We know that v = significand * 2^exponent.
+    // And the exponent > 11.
+    // We simplify the task by dividing v by 10^17.
+    // The quotient delivers the first digits, and the remainder fits into a 64
+    // bit number.
+    // Dividing by 10^17 is equivalent to dividing by 5^17*2^17.
+    const uint64_t kFive17 = UINT64_2PART_C(0xB1, A2BC2EC5);  // 5^17
+    uint64_t divisor = kFive17;
+    int divisor_power = 17;
+    uint64_t dividend = significand;
+    uint32_t quotient;
+    uint64_t remainder;
+    // Let v = f * 2^e with f == significand and e == exponent.
+    // Then need q (quotient) and r (remainder) as follows:
+    //   v            = q * 10^17       + r
+    //   f * 2^e      = q * 10^17       + r
+    //   f * 2^e      = q * 5^17 * 2^17 + r
+    // If e > 17 then
+    //   f * 2^(e-17) = q * 5^17        + r/2^17
+    // else
+    //   f  = q * 5^17 * 2^(17-e) + r/2^e
+    if (exponent > divisor_power) {
+      // We only allow exponents of up to 20 and therefore (17 - e) <= 3
+      dividend <<= exponent - divisor_power;
+      quotient = static_cast<uint32_t>(dividend / divisor);
+      remainder = (dividend % divisor) << divisor_power;
+    } else {
+      divisor <<= divisor_power - exponent;
+      quotient = static_cast<uint32_t>(dividend / divisor);
+      remainder = (dividend % divisor) << exponent;
+    }
+    FillDigits32(quotient, buffer, length);
+    FillDigits64FixedLength(remainder, divisor_power, buffer, length);
+    *decimal_point = *length;
+  } else if (exponent >= 0) {
+    // 0 <= exponent <= 11
+    significand <<= exponent;
+    FillDigits64(significand, buffer, length);
+    *decimal_point = *length;
+  } else if (exponent > -kDoubleSignificandSize) {
+    // We have to cut the number.
+    uint64_t integrals = significand >> -exponent;
+    uint64_t fractionals = significand - (integrals << -exponent);
+    if (integrals > kMaxUInt32) {
+      FillDigits64(integrals, buffer, length);
+    } else {
+      FillDigits32(static_cast<uint32_t>(integrals), buffer, length);
+    }
+    *decimal_point = *length;
+    FillFractionals(fractionals, exponent, fractional_count,
+                    buffer, length, decimal_point);
+  } else if (exponent < -128) {
+    // This configuration (with at most 20 digits) means that all digits must be
+    // 0.
+    ASSERT(fractional_count <= 20);
+    buffer[0] = '\0';
+    *length = 0;
+    *decimal_point = -fractional_count;
+  } else {
+    *decimal_point = 0;
+    FillFractionals(significand, exponent, fractional_count,
+                    buffer, length, decimal_point);
+  }
+  TrimZeros(buffer, length, decimal_point);
+  buffer[*length] = '\0';
+  if ((*length) == 0) {
+    // The string is empty and the decimal_point thus has no importance. Mimick
+    // Gay's dtoa and and set it to -fractional_count.
+    *decimal_point = -fractional_count;
+  }
+  return true;
+}
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/fixed-dtoa.h b/klm/util/double-conversion/fixed-dtoa.h
new file mode 100644
index 00000000..3bdd08e2
--- /dev/null
+++ b/klm/util/double-conversion/fixed-dtoa.h
@@ -0,0 +1,56 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_FIXED_DTOA_H_
+#define DOUBLE_CONVERSION_FIXED_DTOA_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+// Produces digits necessary to print a given number with
+// 'fractional_count' digits after the decimal point.
+// The buffer must be big enough to hold the result plus one terminating null
+// character.
+//
+// The produced digits might be too short in which case the caller has to fill
+// the gaps with '0's.
+// Example: FastFixedDtoa(0.001, 5, ...) is allowed to return buffer = "1", and
+// decimal_point = -2.
+// Halfway cases are rounded towards +/-Infinity (away from 0). The call
+// FastFixedDtoa(0.15, 2, ...) thus returns buffer = "2", decimal_point = 0.
+// The returned buffer may contain digits that would be truncated from the
+// shortest representation of the input.
+//
+// This method only works for some parameters. If it can't handle the input it
+// returns false. The output is null-terminated when the function succeeds.
+bool FastFixedDtoa(double v, int fractional_count,
+                   Vector<char> buffer, int* length, int* decimal_point);
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_FIXED_DTOA_H_
diff --git a/klm/util/double-conversion/ieee.h b/klm/util/double-conversion/ieee.h
new file mode 100644
index 00000000..839dc47d
--- /dev/null
+++ b/klm/util/double-conversion/ieee.h
@@ -0,0 +1,398 @@
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_DOUBLE_H_
+#define DOUBLE_CONVERSION_DOUBLE_H_
+
+#include "diy-fp.h"
+
+namespace double_conversion {
+
+// We assume that doubles and uint64_t have the same endianness.
+static uint64_t double_to_uint64(double d) { return BitCast<uint64_t>(d); }
+static double uint64_to_double(uint64_t d64) { return BitCast<double>(d64); }
+static uint32_t float_to_uint32(float f) { return BitCast<uint32_t>(f); }
+static float uint32_to_float(uint32_t d32) { return BitCast<float>(d32); }
+
+// Helper functions for doubles.
+class Double {
+ public:
+  static const uint64_t kSignMask = UINT64_2PART_C(0x80000000, 00000000);
+  static const uint64_t kExponentMask = UINT64_2PART_C(0x7FF00000, 00000000);
+  static const uint64_t kSignificandMask = UINT64_2PART_C(0x000FFFFF, FFFFFFFF);
+  static const uint64_t kHiddenBit = UINT64_2PART_C(0x00100000, 00000000);
+  static const int kPhysicalSignificandSize = 52;  // Excludes the hidden bit.
+  static const int kSignificandSize = 53;
+
+  Double() : d64_(0) {}
+  explicit Double(double d) : d64_(double_to_uint64(d)) {}
+  explicit Double(uint64_t d64) : d64_(d64) {}
+  explicit Double(DiyFp diy_fp)
+    : d64_(DiyFpToUint64(diy_fp)) {}
+
+  // The value encoded by this Double must be greater or equal to +0.0.
+  // It must not be special (infinity, or NaN).
+  DiyFp AsDiyFp() const {
+    ASSERT(Sign() > 0);
+    ASSERT(!IsSpecial());
+    return DiyFp(Significand(), Exponent());
+  }
+
+  // The value encoded by this Double must be strictly greater than 0.
+  DiyFp AsNormalizedDiyFp() const {
+    ASSERT(value() > 0.0);
+    uint64_t f = Significand();
+    int e = Exponent();
+
+    // The current double could be a denormal.
+    while ((f & kHiddenBit) == 0) {
+      f <<= 1;
+      e--;
+    }
+    // Do the final shifts in one go.
+    f <<= DiyFp::kSignificandSize - kSignificandSize;
+    e -= DiyFp::kSignificandSize - kSignificandSize;
+    return DiyFp(f, e);
+  }
+
+  // Returns the double's bit as uint64.
+  uint64_t AsUint64() const {
+    return d64_;
+  }
+
+  // Returns the next greater double. Returns +infinity on input +infinity.
+  double NextDouble() const {
+    if (d64_ == kInfinity) return Double(kInfinity).value();
+    if (Sign() < 0 && Significand() == 0) {
+      // -0.0
+      return 0.0;
+    }
+    if (Sign() < 0) {
+      return Double(d64_ - 1).value();
+    } else {
+      return Double(d64_ + 1).value();
+    }
+  }
+
+  double PreviousDouble() const {
+    if (d64_ == (kInfinity | kSignMask)) return -Double::Infinity();
+    if (Sign() < 0) {
+      return Double(d64_ + 1).value();
+    } else {
+      if (Significand() == 0) return -0.0;
+      return Double(d64_ - 1).value();
+    }
+  }
+
+  int Exponent() const {
+    if (IsDenormal()) return kDenormalExponent;
+
+    uint64_t d64 = AsUint64();
+    int biased_e =
+        static_cast<int>((d64 & kExponentMask) >> kPhysicalSignificandSize);
+    return biased_e - kExponentBias;
+  }
+
+  uint64_t Significand() const {
+    uint64_t d64 = AsUint64();
+    uint64_t significand = d64 & kSignificandMask;
+    if (!IsDenormal()) {
+      return significand + kHiddenBit;
+    } else {
+      return significand;
+    }
+  }
+
+  // Returns true if the double is a denormal.
+  bool IsDenormal() const {
+    uint64_t d64 = AsUint64();
+    return (d64 & kExponentMask) == 0;
+  }
+
+  // We consider denormals not to be special.
+  // Hence only Infinity and NaN are special.
+  bool IsSpecial() const {
+    uint64_t d64 = AsUint64();
+    return (d64 & kExponentMask) == kExponentMask;
+  }
+
+  bool IsNan() const {
+    uint64_t d64 = AsUint64();
+    return ((d64 & kExponentMask) == kExponentMask) &&
+        ((d64 & kSignificandMask) != 0);
+  }
+
+  bool IsInfinite() const {
+    uint64_t d64 = AsUint64();
+    return ((d64 & kExponentMask) == kExponentMask) &&
+        ((d64 & kSignificandMask) == 0);
+  }
+
+  int Sign() const {
+    uint64_t d64 = AsUint64();
+    return (d64 & kSignMask) == 0? 1: -1;
+  }
+
+  // Precondition: the value encoded by this Double must be greater or equal
+  // than +0.0.
+  DiyFp UpperBoundary() const {
+    ASSERT(Sign() > 0);
+    return DiyFp(Significand() * 2 + 1, Exponent() - 1);
+  }
+
+  // Computes the two boundaries of this.
+  // The bigger boundary (m_plus) is normalized. The lower boundary has the same
+  // exponent as m_plus.
+  // Precondition: the value encoded by this Double must be greater than 0.
+  void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const {
+    ASSERT(value() > 0.0);
+    DiyFp v = this->AsDiyFp();
+    DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1));
+    DiyFp m_minus;
+    if (LowerBoundaryIsCloser()) {
+      m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2);
+    } else {
+      m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1);
+    }
+    m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e()));
+    m_minus.set_e(m_plus.e());
+    *out_m_plus = m_plus;
+    *out_m_minus = m_minus;
+  }
+
+  bool LowerBoundaryIsCloser() const {
+    // The boundary is closer if the significand is of the form f == 2^p-1 then
+    // the lower boundary is closer.
+    // Think of v = 1000e10 and v- = 9999e9.
+    // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but
+    // at a distance of 1e8.
+    // The only exception is for the smallest normal: the largest denormal is
+    // at the same distance as its successor.
+    // Note: denormals have the same exponent as the smallest normals.
+    bool physical_significand_is_zero = ((AsUint64() & kSignificandMask) == 0);
+    return physical_significand_is_zero && (Exponent() != kDenormalExponent);
+  }
+
+  double value() const { return uint64_to_double(d64_); }
+
+  // Returns the significand size for a given order of magnitude.
+  // If v = f*2^e with 2^p-1 <= f <= 2^p then p+e is v's order of magnitude.
+  // This function returns the number of significant binary digits v will have
+  // once it's encoded into a double. In almost all cases this is equal to
+  // kSignificandSize. The only exceptions are denormals. They start with
+  // leading zeroes and their effective significand-size is hence smaller.
+  static int SignificandSizeForOrderOfMagnitude(int order) {
+    if (order >= (kDenormalExponent + kSignificandSize)) {
+      return kSignificandSize;
+    }
+    if (order <= kDenormalExponent) return 0;
+    return order - kDenormalExponent;
+  }
+
+  static double Infinity() {
+    return Double(kInfinity).value();
+  }
+
+  static double NaN() {
+    return Double(kNaN).value();
+  }
+
+ private:
+  static const int kExponentBias = 0x3FF + kPhysicalSignificandSize;
+  static const int kDenormalExponent = -kExponentBias + 1;
+  static const int kMaxExponent = 0x7FF - kExponentBias;
+  static const uint64_t kInfinity = UINT64_2PART_C(0x7FF00000, 00000000);
+  static const uint64_t kNaN = UINT64_2PART_C(0x7FF80000, 00000000);
+
+  const uint64_t d64_;
+
+  static uint64_t DiyFpToUint64(DiyFp diy_fp) {
+    uint64_t significand = diy_fp.f();
+    int exponent = diy_fp.e();
+    while (significand > kHiddenBit + kSignificandMask) {
+      significand >>= 1;
+      exponent++;
+    }
+    if (exponent >= kMaxExponent) {
+      return kInfinity;
+    }
+    if (exponent < kDenormalExponent) {
+      return 0;
+    }
+    while (exponent > kDenormalExponent && (significand & kHiddenBit) == 0) {
+      significand <<= 1;
+      exponent--;
+    }
+    uint64_t biased_exponent;
+    if (exponent == kDenormalExponent && (significand & kHiddenBit) == 0) {
+      biased_exponent = 0;
+    } else {
+      biased_exponent = static_cast<uint64_t>(exponent + kExponentBias);
+    }
+    return (significand & kSignificandMask) |
+        (biased_exponent << kPhysicalSignificandSize);
+  }
+};
+
+class Single {
+ public:
+  static const uint32_t kSignMask = 0x80000000;
+  static const uint32_t kExponentMask = 0x7F800000;
+  static const uint32_t kSignificandMask = 0x007FFFFF;
+  static const uint32_t kHiddenBit = 0x00800000;
+  static const int kPhysicalSignificandSize = 23;  // Excludes the hidden bit.
+  static const int kSignificandSize = 24;
+
+  Single() : d32_(0) {}
+  explicit Single(float f) : d32_(float_to_uint32(f)) {}
+  explicit Single(uint32_t d32) : d32_(d32) {}
+
+  // The value encoded by this Single must be greater or equal to +0.0.
+  // It must not be special (infinity, or NaN).
+  DiyFp AsDiyFp() const {
+    ASSERT(Sign() > 0);
+    ASSERT(!IsSpecial());
+    return DiyFp(Significand(), Exponent());
+  }
+
+  // Returns the single's bit as uint64.
+  uint32_t AsUint32() const {
+    return d32_;
+  }
+
+  int Exponent() const {
+    if (IsDenormal()) return kDenormalExponent;
+
+    uint32_t d32 = AsUint32();
+    int biased_e =
+        static_cast<int>((d32 & kExponentMask) >> kPhysicalSignificandSize);
+    return biased_e - kExponentBias;
+  }
+
+  uint32_t Significand() const {
+    uint32_t d32 = AsUint32();
+    uint32_t significand = d32 & kSignificandMask;
+    if (!IsDenormal()) {
+      return significand + kHiddenBit;
+    } else {
+      return significand;
+    }
+  }
+
+  // Returns true if the single is a denormal.
+  bool IsDenormal() const {
+    uint32_t d32 = AsUint32();
+    return (d32 & kExponentMask) == 0;
+  }
+
+  // We consider denormals not to be special.
+  // Hence only Infinity and NaN are special.
+  bool IsSpecial() const {
+    uint32_t d32 = AsUint32();
+    return (d32 & kExponentMask) == kExponentMask;
+  }
+
+  bool IsNan() const {
+    uint32_t d32 = AsUint32();
+    return ((d32 & kExponentMask) == kExponentMask) &&
+        ((d32 & kSignificandMask) != 0);
+  }
+
+  bool IsInfinite() const {
+    uint32_t d32 = AsUint32();
+    return ((d32 & kExponentMask) == kExponentMask) &&
+        ((d32 & kSignificandMask) == 0);
+  }
+
+  int Sign() const {
+    uint32_t d32 = AsUint32();
+    return (d32 & kSignMask) == 0? 1: -1;
+  }
+
+  // Computes the two boundaries of this.
+  // The bigger boundary (m_plus) is normalized. The lower boundary has the same
+  // exponent as m_plus.
+  // Precondition: the value encoded by this Single must be greater than 0.
+  void NormalizedBoundaries(DiyFp* out_m_minus, DiyFp* out_m_plus) const {
+    ASSERT(value() > 0.0);
+    DiyFp v = this->AsDiyFp();
+    DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1));
+    DiyFp m_minus;
+    if (LowerBoundaryIsCloser()) {
+      m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2);
+    } else {
+      m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1);
+    }
+    m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e()));
+    m_minus.set_e(m_plus.e());
+    *out_m_plus = m_plus;
+    *out_m_minus = m_minus;
+  }
+
+  // Precondition: the value encoded by this Single must be greater or equal
+  // than +0.0.
+  DiyFp UpperBoundary() const {
+    ASSERT(Sign() > 0);
+    return DiyFp(Significand() * 2 + 1, Exponent() - 1);
+  }
+
+  bool LowerBoundaryIsCloser() const {
+    // The boundary is closer if the significand is of the form f == 2^p-1 then
+    // the lower boundary is closer.
+    // Think of v = 1000e10 and v- = 9999e9.
+    // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but
+    // at a distance of 1e8.
+    // The only exception is for the smallest normal: the largest denormal is
+    // at the same distance as its successor.
+    // Note: denormals have the same exponent as the smallest normals.
+    bool physical_significand_is_zero = ((AsUint32() & kSignificandMask) == 0);
+    return physical_significand_is_zero && (Exponent() != kDenormalExponent);
+  }
+
+  float value() const { return uint32_to_float(d32_); }
+
+  static float Infinity() {
+    return Single(kInfinity).value();
+  }
+
+  static float NaN() {
+    return Single(kNaN).value();
+  }
+
+ private:
+  static const int kExponentBias = 0x7F + kPhysicalSignificandSize;
+  static const int kDenormalExponent = -kExponentBias + 1;
+  static const int kMaxExponent = 0xFF - kExponentBias;
+  static const uint32_t kInfinity = 0x7F800000;
+  static const uint32_t kNaN = 0x7FC00000;
+
+  const uint32_t d32_;
+};
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_DOUBLE_H_
diff --git a/klm/util/double-conversion/strtod.cc b/klm/util/double-conversion/strtod.cc
new file mode 100644
index 00000000..9758989f
--- /dev/null
+++ b/klm/util/double-conversion/strtod.cc
@@ -0,0 +1,554 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <stdarg.h>
+#include <limits.h>
+
+#include "strtod.h"
+#include "bignum.h"
+#include "cached-powers.h"
+#include "ieee.h"
+
+namespace double_conversion {
+
+// 2^53 = 9007199254740992.
+// Any integer with at most 15 decimal digits will hence fit into a double
+// (which has a 53bit significand) without loss of precision.
+static const int kMaxExactDoubleIntegerDecimalDigits = 15;
+// 2^64 = 18446744073709551616 > 10^19
+static const int kMaxUint64DecimalDigits = 19;
+
+// Max double: 1.7976931348623157 x 10^308
+// Min non-zero double: 4.9406564584124654 x 10^-324
+// Any x >= 10^309 is interpreted as +infinity.
+// Any x <= 10^-324 is interpreted as 0.
+// Note that 2.5e-324 (despite being smaller than the min double) will be read
+// as non-zero (equal to the min non-zero double).
+static const int kMaxDecimalPower = 309;
+static const int kMinDecimalPower = -324;
+
+// 2^64 = 18446744073709551616
+static const uint64_t kMaxUint64 = UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF);
+
+
+static const double exact_powers_of_ten[] = {
+  1.0,  // 10^0
+  10.0,
+  100.0,
+  1000.0,
+  10000.0,
+  100000.0,
+  1000000.0,
+  10000000.0,
+  100000000.0,
+  1000000000.0,
+  10000000000.0,  // 10^10
+  100000000000.0,
+  1000000000000.0,
+  10000000000000.0,
+  100000000000000.0,
+  1000000000000000.0,
+  10000000000000000.0,
+  100000000000000000.0,
+  1000000000000000000.0,
+  10000000000000000000.0,
+  100000000000000000000.0,  // 10^20
+  1000000000000000000000.0,
+  // 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22
+  10000000000000000000000.0
+};
+static const int kExactPowersOfTenSize = ARRAY_SIZE(exact_powers_of_ten);
+
+// Maximum number of significant digits in the decimal representation.
+// In fact the value is 772 (see conversions.cc), but to give us some margin
+// we round up to 780.
+static const int kMaxSignificantDecimalDigits = 780;
+
+static Vector<const char> TrimLeadingZeros(Vector<const char> buffer) {
+  for (int i = 0; i < buffer.length(); i++) {
+    if (buffer[i] != '0') {
+      return buffer.SubVector(i, buffer.length());
+    }
+  }
+  return Vector<const char>(buffer.start(), 0);
+}
+
+
+static Vector<const char> TrimTrailingZeros(Vector<const char> buffer) {
+  for (int i = buffer.length() - 1; i >= 0; --i) {
+    if (buffer[i] != '0') {
+      return buffer.SubVector(0, i + 1);
+    }
+  }
+  return Vector<const char>(buffer.start(), 0);
+}
+
+
+static void CutToMaxSignificantDigits(Vector<const char> buffer,
+                                       int exponent,
+                                       char* significant_buffer,
+                                       int* significant_exponent) {
+  for (int i = 0; i < kMaxSignificantDecimalDigits - 1; ++i) {
+    significant_buffer[i] = buffer[i];
+  }
+  // The input buffer has been trimmed. Therefore the last digit must be
+  // different from '0'.
+  ASSERT(buffer[buffer.length() - 1] != '0');
+  // Set the last digit to be non-zero. This is sufficient to guarantee
+  // correct rounding.
+  significant_buffer[kMaxSignificantDecimalDigits - 1] = '1';
+  *significant_exponent =
+      exponent + (buffer.length() - kMaxSignificantDecimalDigits);
+}
+
+
+// Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits.
+// If possible the input-buffer is reused, but if the buffer needs to be
+// modified (due to cutting), then the input needs to be copied into the
+// buffer_copy_space.
+static void TrimAndCut(Vector<const char> buffer, int exponent,
+                       char* buffer_copy_space, int space_size,
+                       Vector<const char>* trimmed, int* updated_exponent) {
+  Vector<const char> left_trimmed = TrimLeadingZeros(buffer);
+  Vector<const char> right_trimmed = TrimTrailingZeros(left_trimmed);
+  exponent += left_trimmed.length() - right_trimmed.length();
+  if (right_trimmed.length() > kMaxSignificantDecimalDigits) {
+    ASSERT(space_size >= kMaxSignificantDecimalDigits);
+    CutToMaxSignificantDigits(right_trimmed, exponent,
+                              buffer_copy_space, updated_exponent);
+    *trimmed = Vector<const char>(buffer_copy_space,
+                                 kMaxSignificantDecimalDigits);
+  } else {
+    *trimmed = right_trimmed;
+    *updated_exponent = exponent;
+  }
+}
+
+
+// Reads digits from the buffer and converts them to a uint64.
+// Reads in as many digits as fit into a uint64.
+// When the string starts with "1844674407370955161" no further digit is read.
+// Since 2^64 = 18446744073709551616 it would still be possible read another
+// digit if it was less or equal than 6, but this would complicate the code.
+static uint64_t ReadUint64(Vector<const char> buffer,
+                           int* number_of_read_digits) {
+  uint64_t result = 0;
+  int i = 0;
+  while (i < buffer.length() && result <= (kMaxUint64 / 10 - 1)) {
+    int digit = buffer[i++] - '0';
+    ASSERT(0 <= digit && digit <= 9);
+    result = 10 * result + digit;
+  }
+  *number_of_read_digits = i;
+  return result;
+}
+
+
+// Reads a DiyFp from the buffer.
+// The returned DiyFp is not necessarily normalized.
+// If remaining_decimals is zero then the returned DiyFp is accurate.
+// Otherwise it has been rounded and has error of at most 1/2 ulp.
+static void ReadDiyFp(Vector<const char> buffer,
+                      DiyFp* result,
+                      int* remaining_decimals) {
+  int read_digits;
+  uint64_t significand = ReadUint64(buffer, &read_digits);
+  if (buffer.length() == read_digits) {
+    *result = DiyFp(significand, 0);
+    *remaining_decimals = 0;
+  } else {
+    // Round the significand.
+    if (buffer[read_digits] >= '5') {
+      significand++;
+    }
+    // Compute the binary exponent.
+    int exponent = 0;
+    *result = DiyFp(significand, exponent);
+    *remaining_decimals = buffer.length() - read_digits;
+  }
+}
+
+
+static bool DoubleStrtod(Vector<const char> trimmed,
+                         int exponent,
+                         double* result) {
+#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS)
+  // On x86 the floating-point stack can be 64 or 80 bits wide. If it is
+  // 80 bits wide (as is the case on Linux) then double-rounding occurs and the
+  // result is not accurate.
+  // We know that Windows32 uses 64 bits and is therefore accurate.
+  // Note that the ARM simulator is compiled for 32bits. It therefore exhibits
+  // the same problem.
+  return false;
+#endif
+  if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) {
+    int read_digits;
+    // The trimmed input fits into a double.
+    // If the 10^exponent (resp. 10^-exponent) fits into a double too then we
+    // can compute the result-double simply by multiplying (resp. dividing) the
+    // two numbers.
+    // This is possible because IEEE guarantees that floating-point operations
+    // return the best possible approximation.
+    if (exponent < 0 && -exponent < kExactPowersOfTenSize) {
+      // 10^-exponent fits into a double.
+      *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
+      ASSERT(read_digits == trimmed.length());
+      *result /= exact_powers_of_ten[-exponent];
+      return true;
+    }
+    if (0 <= exponent && exponent < kExactPowersOfTenSize) {
+      // 10^exponent fits into a double.
+      *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
+      ASSERT(read_digits == trimmed.length());
+      *result *= exact_powers_of_ten[exponent];
+      return true;
+    }
+    int remaining_digits =
+        kMaxExactDoubleIntegerDecimalDigits - trimmed.length();
+    if ((0 <= exponent) &&
+        (exponent - remaining_digits < kExactPowersOfTenSize)) {
+      // The trimmed string was short and we can multiply it with
+      // 10^remaining_digits. As a result the remaining exponent now fits
+      // into a double too.
+      *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
+      ASSERT(read_digits == trimmed.length());
+      *result *= exact_powers_of_ten[remaining_digits];
+      *result *= exact_powers_of_ten[exponent - remaining_digits];
+      return true;
+    }
+  }
+  return false;
+}
+
+
+// Returns 10^exponent as an exact DiyFp.
+// The given exponent must be in the range [1; kDecimalExponentDistance[.
+static DiyFp AdjustmentPowerOfTen(int exponent) {
+  ASSERT(0 < exponent);
+  ASSERT(exponent < PowersOfTenCache::kDecimalExponentDistance);
+  // Simply hardcode the remaining powers for the given decimal exponent
+  // distance.
+  ASSERT(PowersOfTenCache::kDecimalExponentDistance == 8);
+  switch (exponent) {
+    case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60);
+    case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57);
+    case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54);
+    case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50);
+    case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47);
+    case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44);
+    case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40);
+    default:
+      UNREACHABLE();
+      return DiyFp(0, 0);
+  }
+}
+
+
+// If the function returns true then the result is the correct double.
+// Otherwise it is either the correct double or the double that is just below
+// the correct double.
+static bool DiyFpStrtod(Vector<const char> buffer,
+                        int exponent,
+                        double* result) {
+  DiyFp input;
+  int remaining_decimals;
+  ReadDiyFp(buffer, &input, &remaining_decimals);
+  // Since we may have dropped some digits the input is not accurate.
+  // If remaining_decimals is different than 0 than the error is at most
+  // .5 ulp (unit in the last place).
+  // We don't want to deal with fractions and therefore keep a common
+  // denominator.
+  const int kDenominatorLog = 3;
+  const int kDenominator = 1 << kDenominatorLog;
+  // Move the remaining decimals into the exponent.
+  exponent += remaining_decimals;
+  int error = (remaining_decimals == 0 ? 0 : kDenominator / 2);
+
+  int old_e = input.e();
+  input.Normalize();
+  error <<= old_e - input.e();
+
+  ASSERT(exponent <= PowersOfTenCache::kMaxDecimalExponent);
+  if (exponent < PowersOfTenCache::kMinDecimalExponent) {
+    *result = 0.0;
+    return true;
+  }
+  DiyFp cached_power;
+  int cached_decimal_exponent;
+  PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent,
+                                                     &cached_power,
+                                                     &cached_decimal_exponent);
+
+  if (cached_decimal_exponent != exponent) {
+    int adjustment_exponent = exponent - cached_decimal_exponent;
+    DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent);
+    input.Multiply(adjustment_power);
+    if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) {
+      // The product of input with the adjustment power fits into a 64 bit
+      // integer.
+      ASSERT(DiyFp::kSignificandSize == 64);
+    } else {
+      // The adjustment power is exact. There is hence only an error of 0.5.
+      error += kDenominator / 2;
+    }
+  }
+
+  input.Multiply(cached_power);
+  // The error introduced by a multiplication of a*b equals
+  //   error_a + error_b + error_a*error_b/2^64 + 0.5
+  // Substituting a with 'input' and b with 'cached_power' we have
+  //   error_b = 0.5  (all cached powers have an error of less than 0.5 ulp),
+  //   error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64
+  int error_b = kDenominator / 2;
+  int error_ab = (error == 0 ? 0 : 1);  // We round up to 1.
+  int fixed_error = kDenominator / 2;
+  error += error_b + error_ab + fixed_error;
+
+  old_e = input.e();
+  input.Normalize();
+  error <<= old_e - input.e();
+
+  // See if the double's significand changes if we add/subtract the error.
+  int order_of_magnitude = DiyFp::kSignificandSize + input.e();
+  int effective_significand_size =
+      Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude);
+  int precision_digits_count =
+      DiyFp::kSignificandSize - effective_significand_size;
+  if (precision_digits_count + kDenominatorLog >= DiyFp::kSignificandSize) {
+    // This can only happen for very small denormals. In this case the
+    // half-way multiplied by the denominator exceeds the range of an uint64.
+    // Simply shift everything to the right.
+    int shift_amount = (precision_digits_count + kDenominatorLog) -
+        DiyFp::kSignificandSize + 1;
+    input.set_f(input.f() >> shift_amount);
+    input.set_e(input.e() + shift_amount);
+    // We add 1 for the lost precision of error, and kDenominator for
+    // the lost precision of input.f().
+    error = (error >> shift_amount) + 1 + kDenominator;
+    precision_digits_count -= shift_amount;
+  }
+  // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too.
+  ASSERT(DiyFp::kSignificandSize == 64);
+  ASSERT(precision_digits_count < 64);
+  uint64_t one64 = 1;
+  uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1;
+  uint64_t precision_bits = input.f() & precision_bits_mask;
+  uint64_t half_way = one64 << (precision_digits_count - 1);
+  precision_bits *= kDenominator;
+  half_way *= kDenominator;
+  DiyFp rounded_input(input.f() >> precision_digits_count,
+                      input.e() + precision_digits_count);
+  if (precision_bits >= half_way + error) {
+    rounded_input.set_f(rounded_input.f() + 1);
+  }
+  // If the last_bits are too close to the half-way case than we are too
+  // inaccurate and round down. In this case we return false so that we can
+  // fall back to a more precise algorithm.
+
+  *result = Double(rounded_input).value();
+  if (half_way - error < precision_bits && precision_bits < half_way + error) {
+    // Too imprecise. The caller will have to fall back to a slower version.
+    // However the returned number is guaranteed to be either the correct
+    // double, or the next-lower double.
+    return false;
+  } else {
+    return true;
+  }
+}
+
+
+// Returns
+//   - -1 if buffer*10^exponent < diy_fp.
+//   -  0 if buffer*10^exponent == diy_fp.
+//   - +1 if buffer*10^exponent > diy_fp.
+// Preconditions:
+//   buffer.length() + exponent <= kMaxDecimalPower + 1
+//   buffer.length() + exponent > kMinDecimalPower
+//   buffer.length() <= kMaxDecimalSignificantDigits
+static int CompareBufferWithDiyFp(Vector<const char> buffer,
+                                  int exponent,
+                                  DiyFp diy_fp) {
+  ASSERT(buffer.length() + exponent <= kMaxDecimalPower + 1);
+  ASSERT(buffer.length() + exponent > kMinDecimalPower);
+  ASSERT(buffer.length() <= kMaxSignificantDecimalDigits);
+  // Make sure that the Bignum will be able to hold all our numbers.
+  // Our Bignum implementation has a separate field for exponents. Shifts will
+  // consume at most one bigit (< 64 bits).
+  // ln(10) == 3.3219...
+  ASSERT(((kMaxDecimalPower + 1) * 333 / 100) < Bignum::kMaxSignificantBits);
+  Bignum buffer_bignum;
+  Bignum diy_fp_bignum;
+  buffer_bignum.AssignDecimalString(buffer);
+  diy_fp_bignum.AssignUInt64(diy_fp.f());
+  if (exponent >= 0) {
+    buffer_bignum.MultiplyByPowerOfTen(exponent);
+  } else {
+    diy_fp_bignum.MultiplyByPowerOfTen(-exponent);
+  }
+  if (diy_fp.e() > 0) {
+    diy_fp_bignum.ShiftLeft(diy_fp.e());
+  } else {
+    buffer_bignum.ShiftLeft(-diy_fp.e());
+  }
+  return Bignum::Compare(buffer_bignum, diy_fp_bignum);
+}
+
+
+// Returns true if the guess is the correct double.
+// Returns false, when guess is either correct or the next-lower double.
+static bool ComputeGuess(Vector<const char> trimmed, int exponent,
+                         double* guess) {
+  if (trimmed.length() == 0) {
+    *guess = 0.0;
+    return true;
+  }
+  if (exponent + trimmed.length() - 1 >= kMaxDecimalPower) {
+    *guess = Double::Infinity();
+    return true;
+  }
+  if (exponent + trimmed.length() <= kMinDecimalPower) {
+    *guess = 0.0;
+    return true;
+  }
+
+  if (DoubleStrtod(trimmed, exponent, guess) ||
+      DiyFpStrtod(trimmed, exponent, guess)) {
+    return true;
+  }
+  if (*guess == Double::Infinity()) {
+    return true;
+  }
+  return false;
+}
+
+double Strtod(Vector<const char> buffer, int exponent) {
+  char copy_buffer[kMaxSignificantDecimalDigits];
+  Vector<const char> trimmed;
+  int updated_exponent;
+  TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
+             &trimmed, &updated_exponent);
+  exponent = updated_exponent;
+
+  double guess;
+  bool is_correct = ComputeGuess(trimmed, exponent, &guess);
+  if (is_correct) return guess;
+
+  DiyFp upper_boundary = Double(guess).UpperBoundary();
+  int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
+  if (comparison < 0) {
+    return guess;
+  } else if (comparison > 0) {
+    return Double(guess).NextDouble();
+  } else if ((Double(guess).Significand() & 1) == 0) {
+    // Round towards even.
+    return guess;
+  } else {
+    return Double(guess).NextDouble();
+  }
+}
+
+float Strtof(Vector<const char> buffer, int exponent) {
+  char copy_buffer[kMaxSignificantDecimalDigits];
+  Vector<const char> trimmed;
+  int updated_exponent;
+  TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
+             &trimmed, &updated_exponent);
+  exponent = updated_exponent;
+
+  double double_guess;
+  bool is_correct = ComputeGuess(trimmed, exponent, &double_guess);
+
+  float float_guess = static_cast<float>(double_guess);
+  if (float_guess == double_guess) {
+    // This shortcut triggers for integer values.
+    return float_guess;
+  }
+
+  // We must catch double-rounding. Say the double has been rounded up, and is
+  // now a boundary of a float, and rounds up again. This is why we have to
+  // look at previous too.
+  // Example (in decimal numbers):
+  //    input: 12349
+  //    high-precision (4 digits): 1235
+  //    low-precision (3 digits):
+  //       when read from input: 123
+  //       when rounded from high precision: 124.
+  // To do this we simply look at the neigbors of the correct result and see
+  // if they would round to the same float. If the guess is not correct we have
+  // to look at four values (since two different doubles could be the correct
+  // double).
+
+  double double_next = Double(double_guess).NextDouble();
+  double double_previous = Double(double_guess).PreviousDouble();
+
+  float f1 = static_cast<float>(double_previous);
+  float f2 = float_guess;
+  float f3 = static_cast<float>(double_next);
+  float f4;
+  if (is_correct) {
+    f4 = f3;
+  } else {
+    double double_next2 = Double(double_next).NextDouble();
+    f4 = static_cast<float>(double_next2);
+  }
+  ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4);
+
+  // If the guess doesn't lie near a single-precision boundary we can simply
+  // return its float-value.
+  if (f1 == f4) {
+    return float_guess;
+  }
+
+  ASSERT((f1 != f2 && f2 == f3 && f3 == f4) ||
+         (f1 == f2 && f2 != f3 && f3 == f4) ||
+         (f1 == f2 && f2 == f3 && f3 != f4));
+
+  // guess and next are the two possible canditates (in the same way that
+  // double_guess was the lower candidate for a double-precision guess).
+  float guess = f1;
+  float next = f4;
+  DiyFp upper_boundary;
+  if (guess == 0.0f) {
+    float min_float = 1e-45f;
+    upper_boundary = Double(static_cast<double>(min_float) / 2).AsDiyFp();
+  } else {
+    upper_boundary = Single(guess).UpperBoundary();
+  }
+  int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
+  if (comparison < 0) {
+    return guess;
+  } else if (comparison > 0) {
+    return next;
+  } else if ((Single(guess).Significand() & 1) == 0) {
+    // Round towards even.
+    return guess;
+  } else {
+    return next;
+  }
+}
+
+}  // namespace double_conversion
diff --git a/klm/util/double-conversion/strtod.h b/klm/util/double-conversion/strtod.h
new file mode 100644
index 00000000..ed0293b8
--- /dev/null
+++ b/klm/util/double-conversion/strtod.h
@@ -0,0 +1,45 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_STRTOD_H_
+#define DOUBLE_CONVERSION_STRTOD_H_
+
+#include "utils.h"
+
+namespace double_conversion {
+
+// The buffer must only contain digits in the range [0-9]. It must not
+// contain a dot or a sign. It must not start with '0', and must not be empty.
+double Strtod(Vector<const char> buffer, int exponent);
+
+// The buffer must only contain digits in the range [0-9]. It must not
+// contain a dot or a sign. It must not start with '0', and must not be empty.
+float Strtof(Vector<const char> buffer, int exponent);
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_STRTOD_H_
diff --git a/klm/util/double-conversion/utils.h b/klm/util/double-conversion/utils.h
new file mode 100644
index 00000000..767094b8
--- /dev/null
+++ b/klm/util/double-conversion/utils.h
@@ -0,0 +1,313 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef DOUBLE_CONVERSION_UTILS_H_
+#define DOUBLE_CONVERSION_UTILS_H_
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <assert.h>
+#ifndef ASSERT
+#define ASSERT(condition)      (assert(condition))
+#endif
+#ifndef UNIMPLEMENTED
+#define UNIMPLEMENTED() (abort())
+#endif
+#ifndef UNREACHABLE
+#define UNREACHABLE()   (abort())
+#endif
+
+// Double operations detection based on target architecture.
+// Linux uses a 80bit wide floating point stack on x86. This induces double
+// rounding, which in turn leads to wrong results.
+// An easy way to test if the floating-point operations are correct is to
+// evaluate: 89255.0/1e22. If the floating-point stack is 64 bits wide then
+// the result is equal to 89255e-22.
+// The best way to test this, is to create a division-function and to compare
+// the output of the division with the expected result. (Inlining must be
+// disabled.)
+// On Linux,x86 89255e-22 != Div_double(89255.0/1e22)
+#if defined(_M_X64) || defined(__x86_64__) || \
+    defined(__ARMEL__) || defined(__avr32__) || \
+    defined(__hppa__) || defined(__ia64__) || \
+    defined(__mips__) || defined(__powerpc__) || \
+    defined(__sparc__) || defined(__sparc) || defined(__s390__) || \
+    defined(__SH4__) || defined(__alpha__) || \
+    defined(_MIPS_ARCH_MIPS32R2)
+#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
+#elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
+#if defined(_WIN32)
+// Windows uses a 64bit wide floating point stack.
+#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
+#else
+#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS
+#endif  // _WIN32
+#else
+#error Target architecture was not detected as supported by Double-Conversion.
+#endif
+
+
+#if defined(_WIN32) && !defined(__MINGW32__)
+
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef short int16_t;  // NOLINT
+typedef unsigned short uint16_t;  // NOLINT
+typedef int int32_t;
+typedef unsigned int uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+// intptr_t and friends are defined in crtdefs.h through stdio.h.
+
+#else
+
+#include <stdint.h>
+
+#endif
+
+// The following macro works on both 32 and 64-bit platforms.
+// Usage: instead of writing 0x1234567890123456
+//      write UINT64_2PART_C(0x12345678,90123456);
+#define UINT64_2PART_C(a, b) (((static_cast<uint64_t>(a) << 32) + 0x##b##u))
+
+
+// The expression ARRAY_SIZE(a) is a compile-time constant of type
+// size_t which represents the number of elements of the given
+// array. You should only use ARRAY_SIZE on statically allocated
+// arrays.
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(a)                                   \
+  ((sizeof(a) / sizeof(*(a))) /                         \
+  static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
+#endif
+
+// A macro to disallow the evil copy constructor and operator= functions
+// This should be used in the private: declarations for a class
+#ifndef DISALLOW_COPY_AND_ASSIGN
+#define DISALLOW_COPY_AND_ASSIGN(TypeName)      \
+  TypeName(const TypeName&);                    \
+  void operator=(const TypeName&)
+#endif
+
+// A macro to disallow all the implicit constructors, namely the
+// default constructor, copy constructor and operator= functions.
+//
+// This should be used in the private: declarations for a class
+// that wants to prevent anyone from instantiating it. This is
+// especially useful for classes containing only static methods.
+#ifndef DISALLOW_IMPLICIT_CONSTRUCTORS
+#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
+  TypeName();                                    \
+  DISALLOW_COPY_AND_ASSIGN(TypeName)
+#endif
+
+namespace double_conversion {
+
+static const int kCharSize = sizeof(char);
+
+// Returns the maximum of the two parameters.
+template <typename T>
+static T Max(T a, T b) {
+  return a < b ? b : a;
+}
+
+
+// Returns the minimum of the two parameters.
+template <typename T>
+static T Min(T a, T b) {
+  return a < b ? a : b;
+}
+
+
+inline int StrLength(const char* string) {
+  size_t length = strlen(string);
+  ASSERT(length == static_cast<size_t>(static_cast<int>(length)));
+  return static_cast<int>(length);
+}
+
+// This is a simplified version of V8's Vector class.
+template <typename T>
+class Vector {
+ public:
+  Vector() : start_(NULL), length_(0) {}
+  Vector(T* data, int length) : start_(data), length_(length) {
+    ASSERT(length == 0 || (length > 0 && data != NULL));
+  }
+
+  // Returns a vector using the same backing storage as this one,
+  // spanning from and including 'from', to but not including 'to'.
+  Vector<T> SubVector(int from, int to) {
+    ASSERT(to <= length_);
+    ASSERT(from < to);
+    ASSERT(0 <= from);
+    return Vector<T>(start() + from, to - from);
+  }
+
+  // Returns the length of the vector.
+  int length() const { return length_; }
+
+  // Returns whether or not the vector is empty.
+  bool is_empty() const { return length_ == 0; }
+
+  // Returns the pointer to the start of the data in the vector.
+  T* start() const { return start_; }
+
+  // Access individual vector elements - checks bounds in debug mode.
+  T& operator[](int index) const {
+    ASSERT(0 <= index && index < length_);
+    return start_[index];
+  }
+
+  T& first() { return start_[0]; }
+
+  T& last() { return start_[length_ - 1]; }
+
+ private:
+  T* start_;
+  int length_;
+};
+
+
+// Helper class for building result strings in a character buffer. The
+// purpose of the class is to use safe operations that checks the
+// buffer bounds on all operations in debug mode.
+class StringBuilder {
+ public:
+  StringBuilder(char* buffer, int size)
+      : buffer_(buffer, size), position_(0) { }
+
+  ~StringBuilder() { if (!is_finalized()) Finalize(); }
+
+  int size() const { return buffer_.length(); }
+
+  // Get the current position in the builder.
+  int position() const {
+    ASSERT(!is_finalized());
+    return position_;
+  }
+
+  // Reset the position.
+  void Reset() { position_ = 0; }
+
+  // Add a single character to the builder. It is not allowed to add
+  // 0-characters; use the Finalize() method to terminate the string
+  // instead.
+  void AddCharacter(char c) {
+    ASSERT(c != '\0');
+    ASSERT(!is_finalized() && position_ < buffer_.length());
+    buffer_[position_++] = c;
+  }
+
+  // Add an entire string to the builder. Uses strlen() internally to
+  // compute the length of the input string.
+  void AddString(const char* s) {
+    AddSubstring(s, StrLength(s));
+  }
+
+  // Add the first 'n' characters of the given string 's' to the
+  // builder. The input string must have enough characters.
+  void AddSubstring(const char* s, int n) {
+    ASSERT(!is_finalized() && position_ + n < buffer_.length());
+    ASSERT(static_cast<size_t>(n) <= strlen(s));
+    memmove(&buffer_[position_], s, n * kCharSize);
+    position_ += n;
+  }
+
+
+  // Add character padding to the builder. If count is non-positive,
+  // nothing is added to the builder.
+  void AddPadding(char c, int count) {
+    for (int i = 0; i < count; i++) {
+      AddCharacter(c);
+    }
+  }
+
+  // Finalize the string by 0-terminating it and returning the buffer.
+  char* Finalize() {
+    ASSERT(!is_finalized() && position_ < buffer_.length());
+    buffer_[position_] = '\0';
+    // Make sure nobody managed to add a 0-character to the
+    // buffer while building the string.
+    ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_));
+    position_ = -1;
+    ASSERT(is_finalized());
+    return buffer_.start();
+  }
+
+ private:
+  Vector<char> buffer_;
+  int position_;
+
+  bool is_finalized() const { return position_ < 0; }
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder);
+};
+
+// The type-based aliasing rule allows the compiler to assume that pointers of
+// different types (for some definition of different) never alias each other.
+// Thus the following code does not work:
+//
+// float f = foo();
+// int fbits = *(int*)(&f);
+//
+// The compiler 'knows' that the int pointer can't refer to f since the types
+// don't match, so the compiler may cache f in a register, leaving random data
+// in fbits.  Using C++ style casts makes no difference, however a pointer to
+// char data is assumed to alias any other pointer.  This is the 'memcpy
+// exception'.
+//
+// Bit_cast uses the memcpy exception to move the bits from a variable of one
+// type of a variable of another type.  Of course the end result is likely to
+// be implementation dependent.  Most compilers (gcc-4.2 and MSVC 2005)
+// will completely optimize BitCast away.
+//
+// There is an additional use for BitCast.
+// Recent gccs will warn when they see casts that may result in breakage due to
+// the type-based aliasing rule.  If you have checked that there is no breakage
+// you can use BitCast to cast one pointer type to another.  This confuses gcc
+// enough that it can no longer see that you have cast one pointer type to
+// another thus avoiding the warning.
+template <class Dest, class Source>
+inline Dest BitCast(const Source& source) {
+  // Compile time assertion: sizeof(Dest) == sizeof(Source)
+  // A compile error here means your Dest and Source have different sizes.
+  typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1];
+
+  Dest dest;
+  memmove(&dest, &source, sizeof(dest));
+  return dest;
+}
+
+template <class Dest, class Source>
+inline Dest BitCast(Source* source) {
+  return BitCast<Dest>(reinterpret_cast<uintptr_t>(source));
+}
+
+}  // namespace double_conversion
+
+#endif  // DOUBLE_CONVERSION_UTILS_H_
diff --git a/klm/util/ersatz_progress.cc b/klm/util/ersatz_progress.cc
index eb635ad8..498ab5c5 100644
--- a/klm/util/ersatz_progress.cc
+++ b/klm/util/ersatz_progress.cc
@@ -9,6 +9,8 @@ namespace util {
 
 namespace { const unsigned char kWidth = 100; }
 
+const char kProgressBanner[] = "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n";
+
 ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<uint64_t>::max()), complete_(next_), out_(NULL) {}
 
 ErsatzProgress::~ErsatzProgress() {
@@ -22,7 +24,7 @@ ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::s
     return;
   }
   if (!message.empty()) *out_ << message << '\n';
-  *out_ << "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n";
+  *out_ << kProgressBanner;
 }
 
 void ErsatzProgress::Milestone() {
@@ -38,7 +40,7 @@ void ErsatzProgress::Milestone() {
     next_ = std::numeric_limits<uint64_t>::max();
     out_ = NULL;
   } else {
-    next_ = std::max(next_, (stone * complete_) / kWidth);
+    next_ = std::max(next_, ((stone + 1) * complete_ + kWidth - 1) / kWidth);
   }
 }
 
diff --git a/klm/util/ersatz_progress.hh b/klm/util/ersatz_progress.hh
index 9909736d..b94399a8 100644
--- a/klm/util/ersatz_progress.hh
+++ b/klm/util/ersatz_progress.hh
@@ -10,6 +10,9 @@
 // boost.  Also adds option to print nothing.  
 
 namespace util {
+
+extern const char kProgressBanner[];
+
 class ErsatzProgress {
   public:
     // No output.  
@@ -32,7 +35,6 @@ class ErsatzProgress {
 
     void Set(uint64_t to) {
       if ((current_ = to) >= next_) Milestone();
-      Milestone();
     }
 
     void Finished() {
diff --git a/klm/util/exception.cc b/klm/util/exception.cc
index 3806e6de..557c3986 100644
--- a/klm/util/exception.cc
+++ b/klm/util/exception.cc
@@ -79,11 +79,6 @@ ErrnoException::ErrnoException() throw() : errno_(errno) {
 
 ErrnoException::~ErrnoException() throw() {}
 
-EndOfFileException::EndOfFileException() throw() {
-  *this << "End of file";
-}
-EndOfFileException::~EndOfFileException() throw() {}
-
 OverflowException::OverflowException() throw() {}
 OverflowException::~OverflowException() throw() {}
 
diff --git a/klm/util/exception.hh b/klm/util/exception.hh
index 0165a7a3..74046cf9 100644
--- a/klm/util/exception.hh
+++ b/klm/util/exception.hh
@@ -44,7 +44,7 @@ class Exception : public std::exception {
 };
 
 /* This implements the normal operator<< for Exception and all its children. 
- * SNIFAE means it only applies to Exception.  Think of this as an ersatz
+ * SFINAE means it only applies to Exception.  Think of this as an ersatz
  * boost::enable_if.  
  */
 template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data) {
@@ -62,30 +62,26 @@ template <class Except, class Data> typename Except::template ExceptionTag<Excep
 #endif
 #endif
 
-#define UTIL_SET_LOCATION(UTIL_e, child, condition) do { \
-  (UTIL_e).SetLocation(__FILE__, __LINE__, UTIL_FUNC_NAME, (child), (condition)); \
-} while (0)
-
 /* Create an instance of Exception, add the message Modify, and throw it.
  * Modify is appended to the what() message and can contain << for ostream
  * operations.  
  *
  * do .. while kludge to swallow trailing ; character
  * http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html .  
+ * Arg can be a constructor argument to the exception.
  */
-#define UTIL_THROW(Exception, Modify) do { \
-  Exception UTIL_e; \
-  UTIL_SET_LOCATION(UTIL_e, #Exception, NULL); \
+#define UTIL_THROW_BACKEND(Condition, Exception, Arg, Modify) do { \
+  Exception UTIL_e Arg; \
+  UTIL_e.SetLocation(__FILE__, __LINE__, UTIL_FUNC_NAME, #Exception, Condition); \
   UTIL_e << Modify; \
   throw UTIL_e; \
 } while (0)
 
-#define UTIL_THROW_VAR(Var, Modify) do { \
-  Exception &UTIL_e = (Var); \
-  UTIL_SET_LOCATION(UTIL_e, NULL, NULL); \
-  UTIL_e << Modify; \
-  throw UTIL_e; \
-} while (0)
+#define UTIL_THROW_ARG(Exception, Arg, Modify) \
+  UTIL_THROW_BACKEND(NULL, Exception, Arg, Modify)
+
+#define UTIL_THROW(Exception, Modify) \
+  UTIL_THROW_BACKEND(NULL, Exception, , Modify);
 
 #if __GNUC__ >= 3
 #define UTIL_UNLIKELY(x) __builtin_expect (!!(x), 0)
@@ -93,15 +89,16 @@ template <class Except, class Data> typename Except::template ExceptionTag<Excep
 #define UTIL_UNLIKELY(x) (x)
 #endif
 
-#define UTIL_THROW_IF(Condition, Exception, Modify) do { \
+#define UTIL_THROW_IF_ARG(Condition, Exception, Arg, Modify) do { \
   if (UTIL_UNLIKELY(Condition)) { \
-    Exception UTIL_e; \
-    UTIL_SET_LOCATION(UTIL_e, #Exception, #Condition); \
-    UTIL_e << Modify; \
-    throw UTIL_e; \
+    UTIL_THROW_BACKEND(#Condition, Exception, Arg, Modify); \
   } \
 } while (0)
 
+#define UTIL_THROW_IF(Condition, Exception, Modify) \
+  UTIL_THROW_IF_ARG(Condition, Exception, , Modify)
+
+// Exception that records errno and adds it to the message.
 class ErrnoException : public Exception {
   public:
     ErrnoException() throw();
@@ -114,12 +111,7 @@ class ErrnoException : public Exception {
     int errno_;
 };
 
-class EndOfFileException : public Exception {
-  public:
-    EndOfFileException() throw();
-    ~EndOfFileException() throw();
-};
-
+// Utilities for overflow checking.  
 class OverflowException : public Exception {
   public:
     OverflowException() throw();
diff --git a/klm/util/file.cc b/klm/util/file.cc
index b9a77cf9..9a6d2e64 100644
--- a/klm/util/file.cc
+++ b/klm/util/file.cc
@@ -1,12 +1,17 @@
+#define _LARGEFILE64_SOURCE
+#define _FILE_OFFSET_BITS 64
+
 #include "util/file.hh"
 
 #include "util/exception.hh"
 
 #include <cstdlib>
 #include <cstdio>
+#include <sstream>
 #include <iostream>
 
 #include <assert.h>
+#include <errno.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
@@ -37,6 +42,18 @@ scoped_FILE::~scoped_FILE() {
   }
 }
 
+// Note that ErrnoException records errno before NameFromFD is called.
+FDException::FDException(int fd) throw() : fd_(fd), name_guess_(NameFromFD(fd)) {
+  *this << "in " << name_guess_ << ' ';
+}
+
+FDException::~FDException() throw() {}
+
+EndOfFileException::EndOfFileException() throw() {
+  *this << "End of file";
+}
+EndOfFileException::~EndOfFileException() throw() {}
+
 int OpenReadOrThrow(const char *name) {
   int ret;
 #if defined(_WIN32) || defined(_WIN64)
@@ -61,19 +78,36 @@ uint64_t SizeFile(int fd) {
 #if defined(_WIN32) || defined(_WIN64)
   __int64 ret = _filelengthi64(fd);
   return (ret == -1) ? kBadSize : ret;
+#else // Not windows.
+
+#ifdef OS_ANDROID
+  struct stat64 sb;
+  int ret = fstat64(fd, &sb);
 #else
   struct stat sb;
-  if (fstat(fd, &sb) == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
+  int ret = fstat(fd, &sb);
+#endif
+  if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
   return sb.st_size;
 #endif
 }
 
+uint64_t SizeOrThrow(int fd) {
+  uint64_t ret = SizeFile(fd);
+  UTIL_THROW_IF_ARG(ret == kBadSize, FDException, (fd), "Failed to size");
+  return ret;
+}
+
 void ResizeOrThrow(int fd, uint64_t to) {
+  UTIL_THROW_IF_ARG(
 #if defined(_WIN32) || defined(_WIN64)
-  UTIL_THROW_IF(_chsize_s(fd, to), ErrnoException, "Resizing to " << to << " bytes failed");
+    _chsize_s
+#elif defined(OS_ANDROID)
+    ftruncate64
 #else
-  UTIL_THROW_IF(ftruncate(fd, to), ErrnoException, "Resizing to " << to << " bytes failed");
+    ftruncate
 #endif
+    (fd, to), FDException, (fd), "while resizing to " << to << " bytes");
 }
 
 std::size_t PartialRead(int fd, void *to, std::size_t amount) {
@@ -81,9 +115,13 @@ std::size_t PartialRead(int fd, void *to, std::size_t amount) {
   amount = min(static_cast<std::size_t>(INT_MAX), amount);
   int ret = _read(fd, to, amount); 
 #else
-  ssize_t ret = read(fd, to, amount);
+  errno = 0;
+  ssize_t ret;
+  do {
+    ret = read(fd, to, amount);
+  } while (ret == -1 && errno == EINTR);
 #endif
-  UTIL_THROW_IF(ret < 0, ErrnoException, "Reading " << amount << " from fd " << fd << " failed.");
+  UTIL_THROW_IF_ARG(ret < 0, FDException, (fd), "while reading " << amount << " bytes");
   return static_cast<std::size_t>(ret);
 }
 
@@ -91,7 +129,7 @@ void ReadOrThrow(int fd, void *to_void, std::size_t amount) {
   uint8_t *to = static_cast<uint8_t*>(to_void);
   while (amount) {
     std::size_t ret = PartialRead(fd, to, amount);
-    UTIL_THROW_IF(ret == 0, EndOfFileException, " in fd " << fd << " but there should be " << amount << " more bytes to read.");
+    UTIL_THROW_IF(ret == 0, EndOfFileException, " in " << NameFromFD(fd) << " but there should be " << amount << " more bytes to read.");
     amount -= ret;
     to += ret;
   }
@@ -109,40 +147,86 @@ std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) {
   return amount;
 }
 
+void PReadOrThrow(int fd, void *to_void, std::size_t size, uint64_t off) {
+  uint8_t *to = static_cast<uint8_t*>(to_void);
+#if defined(_WIN32) || defined(_WIN64)
+  UTIL_THROW(Exception, "TODO: PReadOrThrow for windows using ReadFile http://stackoverflow.com/questions/766477/are-there-equivalents-to-pread-on-different-platforms");
+#else
+  for (;size ;) {
+    ssize_t ret;
+    errno = 0;
+    do {
+#ifdef OS_ANDROID
+      ret = pread64(fd, to, size, off);
+#else
+      ret = pread(fd, to, size, off);
+#endif
+    } while (ret == -1 && errno == EINTR);
+    if (ret <= 0) {
+      UTIL_THROW_IF(ret == 0, EndOfFileException, " for reading " << size << " bytes at " << off << " from " << NameFromFD(fd));
+      UTIL_THROW_ARG(FDException, (fd), "while reading " << size << " bytes at offset " << off);
+    }
+    size -= ret;
+    off += ret;
+    to += ret;
+  }
+#endif
+}
+
 void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
   const uint8_t *data = static_cast<const uint8_t*>(data_void);
   while (size) {
 #if defined(_WIN32) || defined(_WIN64)
     int ret = write(fd, data, min(static_cast<std::size_t>(INT_MAX), size));
 #else
-    ssize_t ret = write(fd, data, size);
+    errno = 0;
+    ssize_t ret;
+    do {
+      ret = write(fd, data, size);
+    } while (ret == -1 && errno == EINTR);
 #endif
-    if (ret < 1) UTIL_THROW(util::ErrnoException, "Write failed");
+    UTIL_THROW_IF_ARG(ret < 1, FDException, (fd), "while writing " << size << " bytes");
     data += ret;
     size -= ret;
   }
 }
 
 void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
-  assert(size);
-  UTIL_THROW_IF(1 != std::fwrite(data, size, 1, to), util::ErrnoException, "Short write; requested size " << size);
+  if (!size) return;
+  UTIL_THROW_IF(1 != std::fwrite(data, size, 1, to), ErrnoException, "Short write; requested size " << size);
 }
 
 void FSyncOrThrow(int fd) {
 // Apparently windows doesn't have fsync?  
 #if !defined(_WIN32) && !defined(_WIN64)
-  UTIL_THROW_IF(-1 == fsync(fd), ErrnoException, "Sync of " << fd << " failed.");
+  UTIL_THROW_IF_ARG(-1 == fsync(fd), FDException, (fd), "while syncing");
 #endif
 }
 
 namespace {
+
+// Static assert for 64-bit off_t size.
+#if !defined(_WIN32) && !defined(_WIN64) && !defined(OS_ANDROID)
+template <unsigned> struct CheckOffT;
+template <> struct CheckOffT<8> {
+  struct True {};
+};
+// If there's a compiler error on the next line, then off_t isn't 64 bit.  And
+// that makes me a sad panda.
+typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
+#endif
+
+// Can't we all just get along?  
 void InternalSeek(int fd, int64_t off, int whence) {
+  UTIL_THROW_IF_ARG(
 #if defined(_WIN32) || defined(_WIN64)
-  UTIL_THROW_IF((__int64)-1 == _lseeki64(fd, off, whence), ErrnoException, "Windows seek failed");
-
+    (__int64)-1 == _lseeki64(fd, off, whence),
+#elif defined(OS_ANDROID)
+    (off64_t)-1 == lseek64(fd, off, whence),
 #else
-  UTIL_THROW_IF((off_t)-1 == lseek(fd, off, whence), ErrnoException, "Seek failed");
+    (off_t)-1 == lseek(fd, off, whence),
 #endif
+    FDException, (fd), "while seeking to " << off << " whence " << whence);
 }
 } // namespace
 
@@ -160,22 +244,18 @@ void SeekEnd(int fd) {
 
 std::FILE *FDOpenOrThrow(scoped_fd &file) {
   std::FILE *ret = fdopen(file.get(), "r+b");
-  if (!ret) UTIL_THROW(util::ErrnoException, "Could not fdopen descriptor " << file.get());
+  UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for write");
   file.release();
   return ret;
 }
 
 std::FILE *FDOpenReadOrThrow(scoped_fd &file) {
   std::FILE *ret = fdopen(file.get(), "rb");
-  if (!ret) UTIL_THROW(util::ErrnoException, "Could not fdopen descriptor " << file.get());
+  UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for read");
   file.release();
   return ret;
 }
 
-TempMaker::TempMaker(const std::string &prefix) : base_(prefix) {
-  base_ += "XXXXXX";
-}
-
 // Sigh.  Windows temporary file creation is full of race conditions.
 #if defined(_WIN32) || defined(_WIN64)
 /* mkstemp extracted from libc/sysdeps/posix/tempname.c.  Copyright
@@ -292,23 +372,87 @@ int
 mkstemp_and_unlink(char *tmpl) {
   int ret = mkstemp(tmpl);
   if (ret != -1) {
-    UTIL_THROW_IF(unlink(tmpl), util::ErrnoException, "Failed to delete " << tmpl);
+    UTIL_THROW_IF(unlink(tmpl), ErrnoException, "while deleting delete " << tmpl);
   }
   return ret;
 }
 #endif
 
-int TempMaker::Make() const {
-  std::string name(base_);
+// If it's a directory, add a /.  This lets users say -T /tmp without creating
+// /tmpAAAAAA
+void NormalizeTempPrefix(std::string &base) {
+  if (base.empty()) return;
+  if (base[base.size() - 1] == '/') return;
+  struct stat sb;
+  // It's fine for it to not exist.
+  if (-1 == stat(base.c_str(), &sb)) return;
+  if (S_ISDIR(sb.st_mode)) base += '/';
+}
+
+int MakeTemp(const std::string &base) {
+  std::string name(base);
+  name += "XXXXXX";
   name.push_back(0);
   int ret;
-  UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&name[0])), util::ErrnoException, "Failed to make a temporary based on " << base_);
+  UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&name[0])), ErrnoException, "while making a temporary based on " << base);
   return ret;
 }
 
-std::FILE *TempMaker::MakeFile() const {
-  util::scoped_fd file(Make());
+std::FILE *FMakeTemp(const std::string &base) {
+  util::scoped_fd file(MakeTemp(base));
   return FDOpenOrThrow(file);
 }
 
+int DupOrThrow(int fd) {
+  int ret = dup(fd);
+  UTIL_THROW_IF_ARG(ret == -1, FDException, (fd), "in duplicating the file descriptor");
+  return ret;
+}
+
+namespace {
+// Try to name things but be willing to fail too.
+bool TryName(int fd, std::string &out) {
+#if defined(_WIN32) || defined(_WIN64)
+  return false;
+#else
+  std::string name("/proc/self/fd/");
+  std::ostringstream convert;
+  convert << fd;
+  name += convert.str();
+  
+  struct stat sb;
+  if (-1 == lstat(name.c_str(), &sb)) 
+    return false;
+  out.resize(sb.st_size + 1);
+  ssize_t ret = readlink(name.c_str(), &out[0], sb.st_size + 1);
+  if (-1 == ret)
+    return false;
+  if (ret > sb.st_size) {
+    // Increased in size?!
+    return false;
+  }
+  out.resize(ret);
+  // Don't use the non-file names.
+  if (!out.empty() && out[0] != '/') 
+    return false;
+  return true;
+#endif
+}
+} // namespace
+
+std::string NameFromFD(int fd) {
+  std::string ret;
+  if (TryName(fd, ret)) return ret;
+  switch (fd) {
+    case 0: return "stdin";
+    case 1: return "stdout";
+    case 2: return "stderr";
+  }
+  ret = "fd ";
+  std::ostringstream convert;
+  convert << fd;
+  ret += convert.str();
+  return ret;
+}
+
 } // namespace util
diff --git a/klm/util/file.hh b/klm/util/file.hh
index c24580d6..be88431d 100644
--- a/klm/util/file.hh
+++ b/klm/util/file.hh
@@ -1,6 +1,8 @@
 #ifndef UTIL_FILE__
 #define UTIL_FILE__
 
+#include "util/exception.hh"
+
 #include <cstddef>
 #include <cstdio>
 #include <string>
@@ -17,7 +19,7 @@ class scoped_fd {
 
     ~scoped_fd();
 
-    void reset(int to) {
+    void reset(int to = -1) {
       scoped_fd other(fd_);
       fd_ = to;
     }
@@ -63,6 +65,32 @@ class scoped_FILE {
     std::FILE *file_;
 };
 
+/* Thrown for any operation where the fd is known. */
+class FDException : public ErrnoException {
+  public:
+    explicit FDException(int fd) throw();
+
+    virtual ~FDException() throw();
+
+    // This may no longer be valid if the exception was thrown past open.
+    int FD() const { return fd_; }
+
+    // Guess from NameFromFD.
+    const std::string &NameGuess() const { return name_guess_; }
+
+  private:
+    int fd_;
+
+    std::string name_guess_;
+};
+
+// End of file reached.
+class EndOfFileException : public Exception {
+  public:
+    EndOfFileException() throw();
+    ~EndOfFileException() throw();
+};
+
 // Open for read only.  
 int OpenReadOrThrow(const char *name);
 // Create file if it doesn't exist, truncate if it does.  Opened for write.   
@@ -71,12 +99,15 @@ int CreateOrThrow(const char *name);
 // Return value for SizeFile when it can't size properly.  
 const uint64_t kBadSize = (uint64_t)-1;
 uint64_t SizeFile(int fd);
+uint64_t SizeOrThrow(int fd);
 
 void ResizeOrThrow(int fd, uint64_t to);
 
 std::size_t PartialRead(int fd, void *to, std::size_t size);
 void ReadOrThrow(int fd, void *to, std::size_t size);
 std::size_t ReadOrEOF(int fd, void *to_void, std::size_t size);
+// Positioned: unix only for now.  
+void PReadOrThrow(int fd, void *to, std::size_t size, uint64_t off);
 
 void WriteOrThrow(int fd, const void *data_void, std::size_t size);
 void WriteOrThrow(FILE *to, const void *data, std::size_t size);
@@ -91,17 +122,20 @@ void SeekEnd(int fd);
 std::FILE *FDOpenOrThrow(scoped_fd &file);
 std::FILE *FDOpenReadOrThrow(scoped_fd &file);
 
-class TempMaker {
-  public:
-    explicit TempMaker(const std::string &prefix);
+// Temporary files
+// Append a / if base is a directory.
+void NormalizeTempPrefix(std::string &base);
+int MakeTemp(const std::string &prefix);
+std::FILE *FMakeTemp(const std::string &prefix);
 
-    // These will already be unlinked for you.  
-    int Make() const;
-    std::FILE *MakeFile() const;
+// dup an fd.
+int DupOrThrow(int fd);
 
-  private:
-    std::string base_;
-};
+/* Attempt get file name from fd.  This won't always work (i.e. on Windows or
+ * a pipe).  The file might have been renamed.  It's intended for diagnostics
+ * and logging only.
+ */
+std::string NameFromFD(int fd);
 
 } // namespace util
 
diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc
index 5a208eff..fbfa0e0e 100644
--- a/klm/util/file_piece.cc
+++ b/klm/util/file_piece.cc
@@ -1,13 +1,15 @@
 #include "util/file_piece.hh"
 
+#include "util/double-conversion/double-conversion.h"
 #include "util/exception.hh"
 #include "util/file.hh"
 #include "util/mmap.hh"
-#ifdef WIN32
+
+#if defined(_WIN32) || defined(_WIN64)
 #include <io.h>
 #else
 #include <unistd.h>
-#endif // WIN32
+#endif
 
 #include <iostream>
 #include <string>
@@ -34,10 +36,17 @@ FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t
   Initialize(name, show_progress, min_buffer);
 }
 
-FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer)  : 
+namespace {
+std::string NamePossiblyFind(int fd, const char *name) {
+  if (name) return name;
+  return NameFromFD(fd);
+}
+} // namespace
+
+FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) : 
   file_(fd), total_size_(SizeFile(file_.get())), page_(SizePage()),
-  progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) {
-  Initialize(name, show_progress, min_buffer);
+  progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + NamePossiblyFind(fd, name)) {
+  Initialize(NamePossiblyFind(fd, name).c_str(), show_progress, min_buffer);
 }
 
 FilePiece::~FilePiece() {}
@@ -103,21 +112,33 @@ void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::s
 }
 
 namespace {
-void ParseNumber(const char *begin, char *&end, float &out) {
-#if defined(sun) || defined(WIN32)
-  out = static_cast<float>(strtod(begin, &end));
-#else
-  out = strtof(begin, &end);
-#endif
+
+static const double_conversion::StringToDoubleConverter kConverter(
+    double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK | double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES,
+    std::numeric_limits<double>::quiet_NaN(),
+    std::numeric_limits<double>::quiet_NaN(),
+    "inf",
+    "NaN");
+
+void ParseNumber(const char *begin, const char *&end, float &out) {
+  int count;
+  out = kConverter.StringToFloat(begin, end - begin, &count);
+  end = begin + count;
 }
-void ParseNumber(const char *begin, char *&end, double &out) {
-  out = strtod(begin, &end);
+void ParseNumber(const char *begin, const char *&end, double &out) {
+  int count;
+  out = kConverter.StringToDouble(begin, end - begin, &count);
+  end = begin + count;
 }
-void ParseNumber(const char *begin, char *&end, long int &out) {
-  out = strtol(begin, &end, 10);
+void ParseNumber(const char *begin, const char *&end, long int &out) {
+  char *silly_end;
+  out = strtol(begin, &silly_end, 10);
+  end = silly_end;
 }
-void ParseNumber(const char *begin, char *&end, unsigned long int &out) {
-  out = strtoul(begin, &end, 10);
+void ParseNumber(const char *begin, const char *&end, unsigned long int &out) {
+  char *silly_end;
+  out = strtoul(begin, &silly_end, 10);
+  end = silly_end;
 }
 } // namespace
 
@@ -127,16 +148,17 @@ template <class T> T FilePiece::ReadNumber() {
     if (at_end_) {
       // Hallucinate a null off the end of the file.
       std::string buffer(position_, position_end_);
-      char *end;
+      const char *buf = buffer.c_str();
+      const char *end = buf + buffer.size();
       T ret;
-      ParseNumber(buffer.c_str(), end, ret);
-      if (buffer.c_str() == end) throw ParseNumberException(buffer);
-      position_ += end - buffer.c_str();
+      ParseNumber(buf, end, ret);
+      if (buf == end) throw ParseNumberException(buffer);
+      position_ += end - buf;
       return ret;
     }
     Shift();
   }
-  char *end;
+  const char *end = last_space_;
   T ret;
   ParseNumber(position_, end, ret);
   if (end == position_) throw ParseNumberException(ReadDelimited());
diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh
index 39bd1581..53310976 100644
--- a/klm/util/file_piece.hh
+++ b/klm/util/file_piece.hh
@@ -29,7 +29,7 @@ class FilePiece {
     // 1 MB default.
     explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
     // Takes ownership of fd.  name is used for messages.
-    explicit FilePiece(int fd, const char *name, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
+    explicit FilePiece(int fd, const char *name = NULL, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
 
     ~FilePiece();
 
diff --git a/klm/util/file_piece_test.cc b/klm/util/file_piece_test.cc
index e79ece7a..91e4c559 100644
--- a/klm/util/file_piece_test.cc
+++ b/klm/util/file_piece_test.cc
@@ -1,6 +1,7 @@
 // Tests might fail if you have creative characters in your path.  Sue me.  
 #include "util/file_piece.hh"
 
+#include "util/file.hh"
 #include "util/scoped.hh"
 
 #define BOOST_TEST_MODULE FilePieceTest
diff --git a/klm/util/have.hh b/klm/util/have.hh
index 85b838e4..1523c0c5 100644
--- a/klm/util/have.hh
+++ b/klm/util/have.hh
@@ -10,8 +10,4 @@
 //#define HAVE_BOOST
 #endif
 
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
 #endif // UTIL_HAVE__
diff --git a/klm/util/multi_intersection.hh b/klm/util/multi_intersection.hh
new file mode 100644
index 00000000..8334d39d
--- /dev/null
+++ b/klm/util/multi_intersection.hh
@@ -0,0 +1,80 @@
+#ifndef UTIL_MULTI_INTERSECTION__
+#define UTIL_MULTI_INTERSECTION__
+
+#include <boost/optional.hpp>
+#include <boost/range/iterator_range.hpp>
+
+#include <algorithm>
+#include <functional>
+#include <vector>
+
+namespace util {
+
+namespace detail {
+template <class Range> struct RangeLessBySize : public std::binary_function<const Range &, const Range &, bool> {
+  bool operator()(const Range &left, const Range &right) const {
+    return left.size() < right.size();
+  }
+};
+
+/* Takes sets specified by their iterators and a boost::optional containing
+ * the lowest intersection if any.  Each set must be sorted in increasing
+ * order.  sets is changed to truncate the beginning of each sequence to the
+ * location of the match or an empty set.  Precondition: sets is not empty
+ * since the intersection over null is the universe and this function does not
+ * know the universe.   
+ */
+template <class Iterator, class Less> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersectionSorted(std::vector<boost::iterator_range<Iterator> > &sets, const Less &less = std::less<typename std::iterator_traits<Iterator>::value_type>()) {
+  typedef std::vector<boost::iterator_range<Iterator> > Sets;
+  typedef typename std::iterator_traits<Iterator>::value_type Value;
+
+  assert(!sets.empty());
+
+  if (sets.front().empty()) return boost::optional<Value>();
+  // Possibly suboptimal to copy for general Value; makes unsigned int go slightly faster.  
+  Value highest(sets.front().front());
+  for (typename Sets::iterator i(sets.begin()); i != sets.end(); ) {
+    i->advance_begin(std::lower_bound(i->begin(), i->end(), highest, less) - i->begin());
+    if (i->empty()) return boost::optional<Value>();
+    if (less(highest, i->front())) {
+      highest = i->front();
+      // start over
+      i = sets.begin();
+    } else {
+      ++i;
+    }
+  }
+  return boost::optional<Value>(highest);
+}
+
+} // namespace detail
+
+template <class Iterator, class Less> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersection(std::vector<boost::iterator_range<Iterator> > &sets, const Less less) {
+  assert(!sets.empty());
+
+  std::sort(sets.begin(), sets.end(), detail::RangeLessBySize<boost::iterator_range<Iterator> >());
+  return detail::FirstIntersectionSorted(sets, less);
+}
+
+template <class Iterator> boost::optional<typename std::iterator_traits<Iterator>::value_type> FirstIntersection(std::vector<boost::iterator_range<Iterator> > &sets) {
+  return FirstIntersection(sets, std::less<typename std::iterator_traits<Iterator>::value_type>());
+}
+
+template <class Iterator, class Output, class Less> void AllIntersection(std::vector<boost::iterator_range<Iterator> > &sets, Output &out, const Less less) {
+  typedef typename std::iterator_traits<Iterator>::value_type Value;
+  assert(!sets.empty());
+
+  std::sort(sets.begin(), sets.end(), detail::RangeLessBySize<boost::iterator_range<Iterator> >());
+  boost::optional<Value> ret;
+  for (boost::optional<Value> ret; ret = detail::FirstIntersectionSorted(sets, less); sets.front().advance_begin(1)) {
+    out(*ret);
+  }
+}
+
+template <class Iterator, class Output> void AllIntersection(std::vector<boost::iterator_range<Iterator> > &sets, Output &out) {
+  AllIntersection(sets, out, std::less<typename std::iterator_traits<Iterator>::value_type>());
+}
+
+} // namespace util
+
+#endif // UTIL_MULTI_INTERSECTION__
diff --git a/klm/util/multi_intersection_test.cc b/klm/util/multi_intersection_test.cc
new file mode 100644
index 00000000..970afc17
--- /dev/null
+++ b/klm/util/multi_intersection_test.cc
@@ -0,0 +1,63 @@
+#include "util/multi_intersection.hh"
+
+#define BOOST_TEST_MODULE MultiIntersectionTest
+#include <boost/test/unit_test.hpp>
+
+namespace util {
+namespace {
+
+BOOST_AUTO_TEST_CASE(Empty) {
+  std::vector<boost::iterator_range<const unsigned int*> > sets;
+  
+  sets.push_back(boost::iterator_range<const unsigned int*>(static_cast<const unsigned int*>(NULL), static_cast<const unsigned int*>(NULL)));
+  BOOST_CHECK(!FirstIntersection(sets));
+}
+
+BOOST_AUTO_TEST_CASE(Single) {
+  std::vector<unsigned int> nums;
+  nums.push_back(1);
+  nums.push_back(4);
+  nums.push_back(100);
+  std::vector<boost::iterator_range<std::vector<unsigned int>::const_iterator> > sets;
+  sets.push_back(nums);
+
+  boost::optional<unsigned int> ret(FirstIntersection(sets));
+
+  BOOST_REQUIRE(ret);
+  BOOST_CHECK_EQUAL(static_cast<unsigned int>(1), *ret);
+}
+
+template <class T, unsigned int len> boost::iterator_range<const T*> RangeFromArray(const T (&arr)[len]) {
+  return boost::iterator_range<const T*>(arr, arr + len);
+}
+
+BOOST_AUTO_TEST_CASE(MultiNone) {
+  unsigned int nums0[] = {1, 3, 4, 22};
+  unsigned int nums1[] = {2, 5, 12};
+  unsigned int nums2[] = {4, 17};
+
+  std::vector<boost::iterator_range<const unsigned int*> > sets;
+  sets.push_back(RangeFromArray(nums0));
+  sets.push_back(RangeFromArray(nums1));
+  sets.push_back(RangeFromArray(nums2));
+
+  BOOST_CHECK(!FirstIntersection(sets));
+}
+
+BOOST_AUTO_TEST_CASE(MultiOne) {
+  unsigned int nums0[] = {1, 3, 4, 17, 22};
+  unsigned int nums1[] = {2, 5, 12, 17};
+  unsigned int nums2[] = {4, 17};
+
+  std::vector<boost::iterator_range<const unsigned int*> > sets;
+  sets.push_back(RangeFromArray(nums0));
+  sets.push_back(RangeFromArray(nums1));
+  sets.push_back(RangeFromArray(nums2));
+
+  boost::optional<unsigned int> ret(FirstIntersection(sets));
+  BOOST_REQUIRE(ret);
+  BOOST_CHECK_EQUAL(static_cast<unsigned int>(17), *ret);
+}
+
+} // namespace
+} // namespace util
diff --git a/klm/util/pcqueue.hh b/klm/util/pcqueue.hh
new file mode 100644
index 00000000..3df8749b
--- /dev/null
+++ b/klm/util/pcqueue.hh
@@ -0,0 +1,105 @@
+#ifndef UTIL_PCQUEUE__
+#define UTIL_PCQUEUE__
+
+#include <boost/interprocess/sync/interprocess_semaphore.hpp>
+#include <boost/scoped_array.hpp>
+#include <boost/thread/mutex.hpp>
+#include <boost/utility.hpp>
+
+#include <errno.h>
+
+namespace util {
+
+inline void WaitSemaphore (boost::interprocess::interprocess_semaphore &on) {
+  while (1) {
+    try {
+      on.wait();
+      break;
+    }
+    catch (boost::interprocess::interprocess_exception &e) {
+      if (e.get_native_error() != EINTR) throw;
+    }
+  }
+}
+
+/* Producer consumer queue safe for multiple producers and multiple consumers.
+ * T must be default constructable and have operator=.  
+ * The value is copied twice for Consume(T &out) or three times for Consume(),
+ * so larger objects should be passed via pointer.
+ * Strong exception guarantee if operator= throws.  Undefined if semaphores throw.  
+ */
+template <class T> class PCQueue : boost::noncopyable {
+ public:
+  explicit PCQueue(size_t size)
+   : empty_(size), used_(0),
+     storage_(new T[size]),
+     end_(storage_.get() + size),
+     produce_at_(storage_.get()),
+     consume_at_(storage_.get()) {}
+
+  // Add a value to the queue.
+  void Produce(const T &val) {
+    WaitSemaphore(empty_);
+    {
+      boost::unique_lock<boost::mutex> produce_lock(produce_at_mutex_);
+      try {
+        *produce_at_ = val;
+      }
+      catch (...) {
+        empty_.post();
+        throw;
+      }
+      if (++produce_at_ == end_) produce_at_ = storage_.get();
+    }
+    used_.post();
+  }
+
+  // Consume a value, assigning it to out.
+  T& Consume(T &out) {
+    WaitSemaphore(used_);
+    {
+      boost::unique_lock<boost::mutex> consume_lock(consume_at_mutex_);
+      try {
+        out = *consume_at_;
+      }
+      catch (...) {
+        used_.post();
+        throw;
+      }
+      if (++consume_at_ == end_) consume_at_ = storage_.get();
+    }
+    empty_.post();
+    return out;
+  }
+
+  // Convenience version of Consume that copies the value to return.
+  // The other version is faster.
+  T Consume() {
+    T ret;
+    Consume(ret);
+    return ret;
+  }
+   
+ private:
+  // Number of empty spaces in storage_.
+  boost::interprocess::interprocess_semaphore empty_;
+  // Number of occupied spaces in storage_.
+  boost::interprocess::interprocess_semaphore used_;
+
+  boost::scoped_array<T> storage_;
+
+  T *const end_;
+
+  // Index for next write in storage_.
+  T *produce_at_;
+  boost::mutex produce_at_mutex_;
+
+  // Index for next read from storage_.
+  T *consume_at_;
+  boost::mutex consume_at_mutex_;
+
+};
+
+} // namespace util
+
+#endif // UTIL_PCQUEUE__
diff --git a/klm/util/pool.cc b/klm/util/pool.cc
index 2dffd06f..429ba158 100644
--- a/klm/util/pool.cc
+++ b/klm/util/pool.cc
@@ -1,5 +1,7 @@
 #include "util/pool.hh"
 
+#include "util/scoped.hh"
+
 #include <stdlib.h>
 
 namespace util {
@@ -24,8 +26,7 @@ void Pool::FreeAll() {
 
 void *Pool::More(std::size_t size) {
   std::size_t amount = std::max(static_cast<size_t>(32) << free_list_.size(), size);
-  uint8_t *ret = static_cast<uint8_t*>(malloc(amount));
-  if (!ret) throw std::bad_alloc();
+  uint8_t *ret = static_cast<uint8_t*>(MallocOrThrow(amount));
   free_list_.push_back(ret);
   current_ = ret + size;
   current_end_ = ret + amount;
diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh
index 4a8aff35..6780489d 100644
--- a/klm/util/probing_hash_table.hh
+++ b/klm/util/probing_hash_table.hh
@@ -126,6 +126,11 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
       }    
     }
 
+    void Clear(Entry invalid) {
+      std::fill(begin_, end_, invalid);
+      entries_ = 0;
+    }
+
   private:
     MutableIterator begin_;
     std::size_t buckets_;
diff --git a/klm/util/read_compressed.cc b/klm/util/read_compressed.cc
index 4ec94c4e..7a1a8fb5 100644
--- a/klm/util/read_compressed.cc
+++ b/klm/util/read_compressed.cc
@@ -370,7 +370,7 @@ ReadBase *ReadFactory(int fd, uint64_t &raw_amount) {
       break;
   }
   try {
-    AdvanceOrThrow(fd, -ReadCompressed::kMagicSize);
+    SeekOrThrow(fd, 0);
   } catch (const util::ErrnoException &e) {
     return new UncompressedWithHeader(hold.release(), header, ReadCompressed::kMagicSize);
   }
diff --git a/klm/util/scoped.cc b/klm/util/scoped.cc
new file mode 100644
index 00000000..e7066ee4
--- /dev/null
+++ b/klm/util/scoped.cc
@@ -0,0 +1,29 @@
+#include "util/scoped.hh"
+
+#include <cstdlib>
+
+namespace util {
+
+MallocException::MallocException(std::size_t requested) throw() {
+  *this << "for " << requested << " bytes ";
+}
+
+MallocException::~MallocException() throw() {}
+
+void *MallocOrThrow(std::size_t requested) {
+  void *ret;
+  UTIL_THROW_IF_ARG(!(ret = std::malloc(requested)), MallocException, (requested), "in malloc");
+  return ret;
+}
+
+scoped_malloc::~scoped_malloc() {
+  std::free(p_);
+}
+
+void scoped_malloc::call_realloc(std::size_t to) {
+  void *ret;
+  UTIL_THROW_IF_ARG(!(ret = std::realloc(p_, to)) && to, MallocException, (to), "in realloc");
+  p_ = ret;
+}
+
+} // namespace util
diff --git a/klm/util/scoped.hh b/klm/util/scoped.hh
index d62c6df1..d0a5aabd 100644
--- a/klm/util/scoped.hh
+++ b/klm/util/scoped.hh
@@ -4,28 +4,31 @@
 
 #include "util/exception.hh"
 #include <cstddef>
-#include <cstdlib>
 
 namespace util {
 
+class MallocException : public ErrnoException {
+  public:
+    explicit MallocException(std::size_t requested) throw();
+    ~MallocException() throw();
+};
+
+void *MallocOrThrow(std::size_t requested);
+
 class scoped_malloc {
   public:
     scoped_malloc() : p_(NULL) {}
 
     scoped_malloc(void *p) : p_(p) {}
 
-    ~scoped_malloc() { std::free(p_); }
+    ~scoped_malloc();
 
     void reset(void *p = NULL) {
       scoped_malloc other(p_);
       p_ = p;
     }
 
-    void call_realloc(std::size_t to) {
-      void *ret;
-      UTIL_THROW_IF(!(ret = std::realloc(p_, to)) && to, util::ErrnoException, "realloc to " << to << " bytes failed.");
-      p_ = ret;
-    }
+    void call_realloc(std::size_t to);
 
     void *get() { return p_; }
     const void *get() const { return p_; }
diff --git a/klm/util/stream/block.hh b/klm/util/stream/block.hh
new file mode 100644
index 00000000..11aa991e
--- /dev/null
+++ b/klm/util/stream/block.hh
@@ -0,0 +1,43 @@
+#ifndef UTIL_STREAM_BLOCK__
+#define UTIL_STREAM_BLOCK__
+
+#include <cstddef>
+#include <stdint.h>
+
+namespace util {
+namespace stream {
+
+class Block {
+  public:
+    Block() : mem_(NULL), valid_size_(0) {}
+
+    Block(void *mem, std::size_t size) : mem_(mem), valid_size_(size) {}
+
+    void SetValidSize(std::size_t to) { valid_size_ = to; }
+    // Read might fill in less than Allocated at EOF.   
+    std::size_t ValidSize() const { return valid_size_; }
+
+    void *Get() { return mem_; }
+    const void *Get() const { return mem_; }
+
+    const void *ValidEnd() const { 
+      return reinterpret_cast<const uint8_t*>(mem_) + valid_size_;
+    }
+
+    operator bool() const { return mem_ != NULL; }
+    bool operator!() const { return mem_ == NULL; }
+ 
+  private:
+    friend class Link;
+    void SetToPoison() {
+      mem_ = NULL;
+    }
+
+    void *mem_;
+    std::size_t valid_size_;
+};
+
+} // namespace stream
+} // namespace util
+
+#endif // UTIL_STREAM_BLOCK__
diff --git a/klm/util/stream/chain.cc b/klm/util/stream/chain.cc
new file mode 100644
index 00000000..46708c60
--- /dev/null
+++ b/klm/util/stream/chain.cc
@@ -0,0 +1,155 @@
+#include "util/stream/chain.hh"
+
+#include "util/stream/io.hh"
+
+#include "util/exception.hh"
+#include "util/pcqueue.hh"
+
+#include <cstdlib>
+#include <new>
+#include <iostream>
+
+#include <stdint.h>
+#include <stdlib.h>
+
+namespace util {
+namespace stream {
+
+ChainConfigException::ChainConfigException() throw() { *this << "Chain configured with "; }
+ChainConfigException::~ChainConfigException() throw() {}
+
+Thread::~Thread() {
+  thread_.join();
+}
+
+void Thread::UnhandledException(const std::exception &e) {
+  std::cerr << e.what() << std::endl;
+  abort();
+}
+
+void Recycler::Run(const ChainPosition &position) {
+  for (Link l(position); l; ++l) {
+    l->SetValidSize(position.GetChain().BlockSize());
+  }
+}
+
+const Recycler kRecycle = Recycler();
+
+Chain::Chain(const ChainConfig &config) : config_(config), complete_called_(false) {
+  UTIL_THROW_IF(!config.entry_size, ChainConfigException, "zero-size entries.");
+  UTIL_THROW_IF(!config.block_count, ChainConfigException, "block count zero");
+  UTIL_THROW_IF(config.total_memory < config.entry_size * config.block_count, ChainConfigException, config.total_memory << " total memory, too small for " << config.block_count << " blocks of containing entries of size " << config.entry_size);
+  // Round down block size to a multiple of entry size.   
+  block_size_ = config.total_memory / (config.block_count * config.entry_size) * config.entry_size;
+}
+
+Chain::~Chain() {
+  Wait();
+}
+
+ChainPosition Chain::Add() {
+  if (!Running()) Start();
+  PCQueue<Block> &in = queues_.back();
+  queues_.push_back(new PCQueue<Block>(config_.block_count));
+  return ChainPosition(in, queues_.back(), this, progress_);
+}
+
+Chain &Chain::operator>>(const WriteAndRecycle &writer) {
+  threads_.push_back(new Thread(Complete(), writer));
+  return *this;
+}
+
+void Chain::Wait(bool release_memory) {
+  if (queues_.empty()) {
+    assert(threads_.empty());
+    return; // Nothing to wait for.  
+  }
+  if (!complete_called_) CompleteLoop();
+  threads_.clear();
+  for (std::size_t i = 0; queues_.front().Consume(); ++i) {
+    if (i == config_.block_count) {
+      std::cerr << "Chain ending without poison." << std::endl;
+      abort();
+    }
+  }
+  queues_.clear();
+  progress_.Finished();
+  complete_called_ = false;
+  if (release_memory) memory_.reset();
+}
+
+void Chain::Start() {
+  Wait(false);
+  if (!memory_.get()) {
+    // Allocate memory.  
+    assert(threads_.empty());
+    assert(queues_.empty());
+    std::size_t malloc_size = block_size_ * config_.block_count;
+    memory_.reset(MallocOrThrow(malloc_size));
+  }
+  // This queue can accomodate all blocks.    
+  queues_.push_back(new PCQueue<Block>(config_.block_count));
+  // Populate the lead queue with blocks.  
+  uint8_t *base = static_cast<uint8_t*>(memory_.get());
+  for (std::size_t i = 0; i < config_.block_count; ++i) {
+    queues_.front().Produce(Block(base, block_size_));
+    base += block_size_;
+  }
+}
+
+ChainPosition Chain::Complete() {
+  assert(Running());
+  UTIL_THROW_IF(complete_called_, util::Exception, "CompleteLoop() called twice");
+  complete_called_ = true;
+  return ChainPosition(queues_.back(), queues_.front(), this, progress_);
+}
+
+Link::Link() : in_(NULL), out_(NULL), poisoned_(true) {}
+
+void Link::Init(const ChainPosition &position) {
+  UTIL_THROW_IF(in_, util::Exception, "Link::Init twice");
+  in_ = position.in_;
+  out_ = position.out_;
+  poisoned_ = false;
+  progress_ = position.progress_;
+  in_->Consume(current_);
+}
+
+Link::Link(const ChainPosition &position) : in_(NULL) {
+  Init(position);
+}
+
+Link::~Link() {
+  if (current_) {
+    // Probably an exception unwinding.  
+    std::cerr << "Last input should have been poison." << std::endl;
+//    abort();
+  } else {
+    if (!poisoned_) {
+      // Pass the poison!
+      out_->Produce(current_);
+    }
+  }
+}
+
+Link &Link::operator++() {
+  assert(current_);
+  progress_ += current_.ValidSize();
+  out_->Produce(current_);
+  in_->Consume(current_);
+  if (!current_) {
+    poisoned_ = true;
+    out_->Produce(current_);
+  }
+  return *this;
+}
+
+void Link::Poison() {
+  assert(!poisoned_);
+  current_.SetToPoison();
+  out_->Produce(current_);
+  poisoned_ = true;
+}
+
+} // namespace stream
+} // namespace util
diff --git a/klm/util/stream/chain.hh b/klm/util/stream/chain.hh
new file mode 100644
index 00000000..154b9b33
--- /dev/null
+++ b/klm/util/stream/chain.hh
@@ -0,0 +1,198 @@
+#ifndef UTIL_STREAM_CHAIN__
+#define UTIL_STREAM_CHAIN__
+
+#include "util/stream/block.hh"
+#include "util/stream/config.hh"
+#include "util/stream/multi_progress.hh"
+#include "util/scoped.hh"
+
+#include <boost/ptr_container/ptr_vector.hpp>
+#include <boost/thread/thread.hpp>
+
+#include <cstddef>
+
+#include <assert.h>
+
+namespace util {
+template <class T> class PCQueue;
+namespace stream {
+
+class ChainConfigException : public Exception {
+  public:
+    ChainConfigException() throw();
+    ~ChainConfigException() throw();
+};
+
+class Chain;
+// Specifies position in chain for Link constructor.
+class ChainPosition {
+  public:
+    const Chain &GetChain() const { return *chain_; }
+  private:
+    friend class Chain;
+    friend class Link;
+    ChainPosition(PCQueue<Block> &in, PCQueue<Block> &out, Chain *chain, MultiProgress &progress) 
+      : in_(&in), out_(&out), chain_(chain), progress_(progress.Add()) {}
+
+    PCQueue<Block> *in_, *out_;
+
+    Chain *chain_;
+
+    WorkerProgress progress_;
+};
+
+// Position is usually ChainPosition but if there are multiple streams involved, this can be ChainPositions.  
+class Thread {
+  public:
+    template <class Position, class Worker> Thread(const Position &position, const Worker &worker)
+      : thread_(boost::ref(*this), position, worker) {}
+
+    ~Thread();
+
+    template <class Position, class Worker> void operator()(const Position &position, Worker &worker) {
+      try {
+        worker.Run(position);
+      } catch (const std::exception &e) {
+        UnhandledException(e);
+      }
+    }
+
+  private:
+    void UnhandledException(const std::exception &e);
+
+    boost::thread thread_;
+};
+
+class Recycler {
+  public:
+    void Run(const ChainPosition &position);
+};
+
+extern const Recycler kRecycle;
+class WriteAndRecycle;
+
+class Chain {
+  private:
+    template <class T, void (T::*ptr)(const ChainPosition &) = &T::Run> struct CheckForRun {
+      typedef Chain type;
+    };
+
+  public:
+    explicit Chain(const ChainConfig &config);
+
+    ~Chain();
+
+    void ActivateProgress() {
+      assert(!Running());
+      progress_.Activate();
+    }
+
+    void SetProgressTarget(uint64_t target) {
+      progress_.SetTarget(target);
+    }
+
+    std::size_t EntrySize() const {
+      return config_.entry_size;
+    }
+    std::size_t BlockSize() const {
+      return block_size_;
+    }
+
+    // Two ways to add to the chain: Add() or operator>>.  
+    ChainPosition Add();
+
+    // This is for adding threaded workers with a Run method.  
+    template <class Worker> typename CheckForRun<Worker>::type &operator>>(const Worker &worker) {
+      assert(!complete_called_);
+      threads_.push_back(new Thread(Add(), worker));
+      return *this;
+    }
+
+    // Avoid copying the worker.  
+    template <class Worker> typename CheckForRun<Worker>::type &operator>>(const boost::reference_wrapper<Worker> &worker) {
+      assert(!complete_called_);
+      threads_.push_back(new Thread(Add(), worker));
+      return *this;
+    }
+
+    // Note that Link and Stream also define operator>> outside this class.  
+
+    // To complete the loop, call CompleteLoop(), >> kRecycle, or the destructor.  
+    void CompleteLoop() {
+      threads_.push_back(new Thread(Complete(), kRecycle));
+    }
+
+    Chain &operator>>(const Recycler &recycle) {
+      CompleteLoop();
+      return *this;
+    }
+
+    Chain &operator>>(const WriteAndRecycle &writer);
+
+    // Chains are reusable.  Call Wait to wait for everything to finish and free memory.  
+    void Wait(bool release_memory = true);
+
+    // Waits for the current chain to complete (if any) then starts again.  
+    void Start();
+
+    bool Running() const { return !queues_.empty(); }
+
+  private:
+    ChainPosition Complete();
+
+    ChainConfig config_;
+
+    std::size_t block_size_;
+
+    scoped_malloc memory_;
+
+    boost::ptr_vector<PCQueue<Block> > queues_;
+
+    bool complete_called_;
+
+    boost::ptr_vector<Thread> threads_;
+
+    MultiProgress progress_;
+};
+
+// Create the link in the worker thread using the position token.
+class Link {
+  public:
+    // Either default construct and Init or just construct all at once.
+    Link();
+    void Init(const ChainPosition &position);
+
+    explicit Link(const ChainPosition &position);
+
+    ~Link();
+
+    Block &operator*() { return current_; }
+    const Block &operator*() const { return current_; }
+
+    Block *operator->() { return &current_; }
+    const Block *operator->() const { return &current_; }
+
+    Link &operator++();
+
+    operator bool() const { return current_; }
+
+    void Poison();
+
+  private:
+    Block current_;
+    PCQueue<Block> *in_, *out_;
+ 
+    bool poisoned_;
+
+    WorkerProgress progress_;
+};
+
+inline Chain &operator>>(Chain &chain, Link &link) {
+  link.Init(chain.Add());
+  return chain;
+}
+
+} // namespace stream
+} // namespace util
+
+#endif // UTIL_STREAM_CHAIN__
diff --git a/klm/util/stream/config.hh b/klm/util/stream/config.hh
new file mode 100644
index 00000000..1eeb3a8a
--- /dev/null
+++ b/klm/util/stream/config.hh
@@ -0,0 +1,32 @@
+#ifndef UTIL_STREAM_CONFIG__
+#define UTIL_STREAM_CONFIG__
+
+#include <cstddef>
+#include <string>
+
+namespace util { namespace stream {
+
+struct ChainConfig {
+  ChainConfig() {}
+
+  ChainConfig(std::size_t in_entry_size, std::size_t in_block_count, std::size_t in_total_memory)
+    : entry_size(in_entry_size), block_count(in_block_count), total_memory(in_total_memory) {}
+
+  std::size_t entry_size;
+  std::size_t block_count;
+  // Chain's constructor will make this a multiple of entry_size. 
+  std::size_t total_memory;
+};
+
+struct SortConfig {
+  std::string temp_prefix;
+
+  // Size of each input/output buffer.
+  std::size_t buffer_size;
+
+  // Total memory to use when running alone.
+  std::size_t total_memory;
+};
+
+}} // namespaces
+#endif // UTIL_STREAM_CONFIG__
diff --git a/klm/util/stream/io.cc b/klm/util/stream/io.cc
new file mode 100644
index 00000000..c7ad2980
--- /dev/null
+++ b/klm/util/stream/io.cc
@@ -0,0 +1,64 @@
+#include "util/stream/io.hh"
+
+#include "util/file.hh"
+#include "util/stream/chain.hh"
+
+#include <cstddef>
+
+namespace util {
+namespace stream {
+
+ReadSizeException::ReadSizeException() throw() {}
+ReadSizeException::~ReadSizeException() throw() {}
+
+void Read::Run(const ChainPosition &position) {
+  const std::size_t block_size = position.GetChain().BlockSize();
+  const std::size_t entry_size = position.GetChain().EntrySize();
+  for (Link link(position); link; ++link) {
+    std::size_t got = util::ReadOrEOF(file_, link->Get(), block_size);
+    UTIL_THROW_IF(got % entry_size, ReadSizeException, "File ended with " << got << " bytes, not a multiple of " << entry_size << "."); 
+    if (got == 0) {
+      link.Poison();
+      return;
+    } else {
+      link->SetValidSize(got);
+    }
+  }
+}
+
+void PRead::Run(const ChainPosition &position) {
+  scoped_fd owner;
+  if (own_) owner.reset(file_);
+  uint64_t size = SizeOrThrow(file_);
+  UTIL_THROW_IF(size % static_cast<uint64_t>(position.GetChain().EntrySize()), ReadSizeException, "File size " << file_ << " size is " << size << " not a multiple of " << position.GetChain().EntrySize());
+  std::size_t block_size = position.GetChain().BlockSize();
+  Link link(position);
+  uint64_t offset = 0;
+  for (; offset + block_size < size; offset += block_size, ++link) {
+    PReadOrThrow(file_, link->Get(), block_size, offset);
+    link->SetValidSize(block_size);
+  }
+  if (size - offset) {
+    PReadOrThrow(file_, link->Get(), size - offset, offset);
+    link->SetValidSize(size - offset);
+    ++link;
+  }
+  link.Poison();
+}
+
+void Write::Run(const ChainPosition &position) {
+  for (Link link(position); link; ++link) {
+    WriteOrThrow(file_, link->Get(), link->ValidSize());
+  }
+}
+
+void WriteAndRecycle::Run(const ChainPosition &position) {
+  const std::size_t block_size = position.GetChain().BlockSize();
+  for (Link link(position); link; ++link) {
+    WriteOrThrow(file_, link->Get(), link->ValidSize());
+    link->SetValidSize(block_size);
+  }
+}
+
+} // namespace stream
+} // namespace util
diff --git a/klm/util/stream/io.hh b/klm/util/stream/io.hh
new file mode 100644
index 00000000..934b6b3f
--- /dev/null
+++ b/klm/util/stream/io.hh
@@ -0,0 +1,76 @@
+#ifndef UTIL_STREAM_IO__
+#define UTIL_STREAM_IO__
+
+#include "util/exception.hh"
+#include "util/file.hh"
+
+namespace util {
+namespace stream {
+
+class ChainPosition;
+
+class ReadSizeException : public util::Exception {
+  public:
+    ReadSizeException() throw();
+    ~ReadSizeException() throw();
+};
+
+class Read {
+  public:
+    explicit Read(int fd) : file_(fd) {}
+    void Run(const ChainPosition &position); 
+  private:
+    int file_;
+};
+
+// Like read but uses pread so that the file can be accessed from multiple threads.  
+class PRead {
+  public:
+    explicit PRead(int fd, bool take_own = false) : file_(fd), own_(take_own) {}
+    void Run(const ChainPosition &position);
+  private:
+    int file_;
+    bool own_;
+};
+
+class Write {
+  public:
+    explicit Write(int fd) : file_(fd) {}
+    void Run(const ChainPosition &position);
+  private:
+    int file_;
+};
+
+class WriteAndRecycle {
+  public:
+    explicit WriteAndRecycle(int fd) : file_(fd) {}
+    void Run(const ChainPosition &position);
+  private:
+    int file_;
+};
+
+// Reuse the same file over and over again to buffer output.  
+class FileBuffer {
+  public:
+    explicit FileBuffer(int fd) : file_(fd) {}
+
+    WriteAndRecycle Sink() const {
+      util::SeekOrThrow(file_.get(), 0);
+      return WriteAndRecycle(file_.get());
+    }
+
+    PRead Source() const {
+      return PRead(file_.get());
+    }
+
+    uint64_t Size() const {
+      return SizeOrThrow(file_.get());
+    }
+
+  private:
+    scoped_fd file_;
+};
+
+} // namespace stream
+} // namespace util
+#endif // UTIL_STREAM_IO__
diff --git a/klm/util/stream/io_test.cc b/klm/util/stream/io_test.cc
new file mode 100644
index 00000000..82108335
--- /dev/null
+++ b/klm/util/stream/io_test.cc
@@ -0,0 +1,38 @@
+#include "util/stream/io.hh"
+
+#include "util/stream/chain.hh"
+#include "util/file.hh"
+
+#define BOOST_TEST_MODULE IOTest
+#include <boost/test/unit_test.hpp>
+
+#include <unistd.h>
+
+namespace util { namespace stream { namespace {
+
+BOOST_AUTO_TEST_CASE(CopyFile) {
+  std::string temps("io_test_temp");
+
+  scoped_fd in(MakeTemp(temps));
+  for (uint64_t i = 0; i < 100000; ++i) {
+    WriteOrThrow(in.get(), &i, sizeof(uint64_t));
+  }
+  SeekOrThrow(in.get(), 0);
+  scoped_fd out(MakeTemp(temps));
+
+  ChainConfig config;
+  config.entry_size = 8;
+  config.total_memory = 1024;
+  config.block_count = 10;
+
+  Chain(config) >> PRead(in.get()) >> Write(out.get());
+
+  SeekOrThrow(out.get(), 0);
+  for (uint64_t i = 0; i < 100000; ++i) {
+    uint64_t got;
+    ReadOrThrow(out.get(), &got, sizeof(uint64_t));
+    BOOST_CHECK_EQUAL(i, got);
+  }
+}
+
+}}} // namespaces
diff --git a/klm/util/stream/line_input.cc b/klm/util/stream/line_input.cc
new file mode 100644
index 00000000..dafa5020
--- /dev/null
+++ b/klm/util/stream/line_input.cc
@@ -0,0 +1,52 @@
+#include "util/stream/line_input.hh"
+
+#include "util/exception.hh"
+#include "util/file.hh"
+#include "util/read_compressed.hh"
+#include "util/stream/chain.hh"
+
+#include <algorithm>
+#include <vector>
+
+namespace util { namespace stream {
+
+void LineInput::Run(const ChainPosition &position) {
+  ReadCompressed reader(fd_);
+  // Holding area for beginning of line to be placed in next block.
+  std::vector<char> carry;
+  
+  for (Link block(position); ; ++block) {
+    char *to = static_cast<char*>(block->Get());
+    char *begin = to;
+    char *end = to + position.GetChain().BlockSize();
+    std::copy(carry.begin(), carry.end(), to);
+    to += carry.size();
+    while (to != end) {
+      std::size_t got = reader.Read(to, end - to);
+      if (!got) {
+        // EOF
+        block->SetValidSize(to - begin);
+        ++block;
+        block.Poison();
+        return;
+      }
+      to += got;
+    }
+
+    // Find the last newline.
+    char *newline;
+    for (newline = to - 1; ; --newline) {
+      UTIL_THROW_IF(newline < begin, Exception, "Did not find a newline in " << position.GetChain().BlockSize() << " bytes of input of " << NameFromFD(fd_) << ".  Is this a text file?");
+      if (*newline == '\n') break;
+    }
+    
+    // Copy everything after the last newline to the carry.
+    carry.clear();
+    carry.resize(to - (newline + 1));
+    std::copy(newline + 1, to, &*carry.begin());
+
+    block->SetValidSize(newline + 1 - begin);
+  }
+}
+
+}} // namespaces
diff --git a/klm/util/stream/line_input.hh b/klm/util/stream/line_input.hh
new file mode 100644
index 00000000..86db1dd0
--- /dev/null
+++ b/klm/util/stream/line_input.hh
@@ -0,0 +1,22 @@
+#ifndef UTIL_STREAM_LINE_INPUT__
+#define UTIL_STREAM_LINE_INPUT__
+namespace util {namespace stream {
+
+class ChainPosition;
+
+/* Worker that reads input into blocks, ensuring that blocks contain whole
+ * lines.  Assumes that the maximum size of a line is less than the block size
+ */
+class LineInput {
+  public:
+    // Takes ownership upon thread execution.
+    explicit LineInput(int fd);
+
+    void Run(const ChainPosition &position);
+
+  private:
+    int fd_;
+};
+
+}} // namespaces
+#endif // UTIL_STREAM_LINE_INPUT__
diff --git a/klm/util/stream/multi_progress.cc b/klm/util/stream/multi_progress.cc
new file mode 100644
index 00000000..8ba10386
--- /dev/null
+++ b/klm/util/stream/multi_progress.cc
@@ -0,0 +1,86 @@
+#include "util/stream/multi_progress.hh"
+
+// TODO: merge some functionality with the simple progress bar?
+#include "util/ersatz_progress.hh"
+
+#include <iostream>
+#include <limits>
+
+#include <string.h>
+
+#if !defined(_WIN32) && !defined(_WIN64)
+#include <unistd.h>
+#endif
+
+namespace util { namespace stream {
+
+namespace {
+const char kDisplayCharacters[] = "-+*#0123456789";
+
+uint64_t Next(unsigned char stone, uint64_t complete) {
+  return (static_cast<uint64_t>(stone + 1) * complete + MultiProgress::kWidth - 1) / MultiProgress::kWidth;
+}
+
+} // namespace
+
+MultiProgress::MultiProgress() : active_(false), complete_(std::numeric_limits<uint64_t>::max()), character_handout_(0) {}
+
+MultiProgress::~MultiProgress() {
+  if (active_ && complete_ != std::numeric_limits<uint64_t>::max())
+    std::cerr << '\n';
+}
+
+void MultiProgress::Activate() {
+  active_ = 
+#if !defined(_WIN32) && !defined(_WIN64)
+    // Is stderr a terminal?  
+    (isatty(2) == 1)
+#else
+    true
+#endif
+    ;
+}
+
+void MultiProgress::SetTarget(uint64_t complete) {
+  if (!active_) return;
+  complete_ = complete;
+  if (!complete) complete_ = 1;
+  memset(display_, 0, sizeof(display_));
+  character_handout_ = 0;
+  std::cerr << kProgressBanner;
+}
+
+WorkerProgress MultiProgress::Add() {
+  if (!active_)
+    return WorkerProgress(std::numeric_limits<uint64_t>::max(), *this, '\0');
+  std::size_t character_index;
+  {
+    boost::unique_lock<boost::mutex> lock(mutex_);
+    character_index = character_handout_++;
+    if (character_handout_ == sizeof(kDisplayCharacters) - 1)
+      character_handout_ = 0;
+  }
+  return WorkerProgress(Next(0, complete_), *this, kDisplayCharacters[character_index]);
+}
+
+void MultiProgress::Finished() {
+  if (!active_ || complete_ == std::numeric_limits<uint64_t>::max()) return;
+  std::cerr << '\n';
+  complete_ = std::numeric_limits<uint64_t>::max();
+}
+
+void MultiProgress::Milestone(WorkerProgress &worker) {
+  if (!active_ || complete_ == std::numeric_limits<uint64_t>::max()) return;
+  unsigned char stone = std::min(static_cast<uint64_t>(kWidth), worker.current_ * kWidth / complete_);
+  for (char *i = &display_[worker.stone_]; i < &display_[stone]; ++i) {
+    *i = worker.character_;
+  }
+  worker.next_ = Next(stone, complete_);
+  worker.stone_ = stone;
+  {
+    boost::unique_lock<boost::mutex> lock(mutex_);
+    std::cerr << '\r' << display_ << std::flush;
+  }
+}
+
+}} // namespaces
diff --git a/klm/util/stream/multi_progress.hh b/klm/util/stream/multi_progress.hh
new file mode 100644
index 00000000..c4dd45a9
--- /dev/null
+++ b/klm/util/stream/multi_progress.hh
@@ -0,0 +1,90 @@
+/* Progress bar suitable for chains of workers */
+#ifndef UTIL_MULTI_PROGRESS__
+#define UTIL_MULTI_PROGRESS__
+
+#include <boost/thread/mutex.hpp>
+
+#include <cstddef>
+
+#include <stdint.h>
+
+namespace util { namespace stream {
+
+class WorkerProgress;
+
+class MultiProgress {
+  public:
+    static const unsigned char kWidth = 100;
+
+    MultiProgress();
+
+    ~MultiProgress();
+
+    // Turns on showing (requires SetTarget too).
+    void Activate();
+
+    void SetTarget(uint64_t complete);
+
+    WorkerProgress Add();
+
+    void Finished();
+
+  private:
+    friend class WorkerProgress;
+    void Milestone(WorkerProgress &worker);
+
+    bool active_;
+
+    uint64_t complete_;
+
+    boost::mutex mutex_;
+
+    // \0 at the end.  
+    char display_[kWidth + 1];
+
+    std::size_t character_handout_;
+
+    MultiProgress(const MultiProgress &);
+    MultiProgress &operator=(const MultiProgress &);
+};
+
+class WorkerProgress {
+  public:
+    // Default contrutor must be initialized with operator= later.  
+    WorkerProgress() : parent_(NULL) {}
+
+    // Not threadsafe for the same worker by default.  
+    WorkerProgress &operator++() {
+      if (++current_ >= next_) {
+        parent_->Milestone(*this);
+      }
+      return *this;
+    }
+
+    WorkerProgress &operator+=(uint64_t amount) {
+      current_ += amount;
+      if (current_ >= next_) {
+        parent_->Milestone(*this);
+      }
+      return *this;
+    }
+
+  private:
+    friend class MultiProgress;
+    WorkerProgress(uint64_t next, MultiProgress &parent, char character) 
+      : current_(0), next_(next), parent_(&parent), stone_(0), character_(character) {}
+
+    uint64_t current_, next_;
+
+    MultiProgress *parent_;
+
+    // Previous milestone reached.  
+    unsigned char stone_;
+
+    // Character to display in bar.  
+    char character_;
+};
+
+}} // namespaces
+
+#endif // UTIL_MULTI_PROGRESS__
diff --git a/klm/util/stream/sort.hh b/klm/util/stream/sort.hh
new file mode 100644
index 00000000..be6c11ea
--- /dev/null
+++ b/klm/util/stream/sort.hh
@@ -0,0 +1,542 @@
+/* Usage:
+ * Sort<Compare> sorter(temp, compare);
+ * Chain(config) >> Read(file) >> sorter.Unsorted();
+ * Stream stream;
+ * Chain chain(config) >> sorter.Sorted(internal_config, lazy_config) >> stream;
+ * 
+ * Note that sorter must outlive any threads that use Unsorted or Sorted.  
+ *
+ * Combiners take the form:
+ * bool operator()(void *into, const void *option, const Compare &compare) const
+ * which returns true iff a combination happened.  The sorting algorithm
+ * guarantees compare(into, option).  But it does not guarantee 
+ * compare(option, into).  
+ * Currently, combining is only done in merge steps, not during on-the-fly
+ * sort.  Use a hash table for that.  
+ */
+
+#ifndef UTIL_STREAM_SORT__
+#define UTIL_STREAM_SORT__
+
+#include "util/stream/chain.hh"
+#include "util/stream/config.hh"
+#include "util/stream/io.hh"
+#include "util/stream/stream.hh"
+#include "util/stream/timer.hh"
+
+#include "util/file.hh"
+#include "util/scoped.hh"
+#include "util/sized_iterator.hh"
+
+#include <algorithm>
+#include <iostream>
+#include <queue>
+#include <string>
+
+namespace util {
+namespace stream {
+
+struct NeverCombine {
+  template <class Compare> bool operator()(const void *, const void *, const Compare &) const { 
+    return false;
+  }
+};
+
+// Manage the offsets of sorted blocks in a file.  
+class Offsets {
+  public:
+    explicit Offsets(int fd) : log_(fd) {
+      Reset();
+    }
+
+    int File() const { return log_; }
+
+    void Append(uint64_t length) {
+      if (!length) return;
+      ++block_count_;
+      if (length == cur_.length) {
+        ++cur_.run;
+        return;
+      }
+      WriteOrThrow(log_, &cur_, sizeof(Entry));
+      cur_.length = length;
+      cur_.run = 1;
+    }
+
+    void FinishedAppending() {
+      WriteOrThrow(log_, &cur_, sizeof(Entry));
+      SeekOrThrow(log_, sizeof(Entry)); // Skip 0,0 at beginning.
+      cur_.run = 0;
+      if (block_count_) {
+        ReadOrThrow(log_, &cur_, sizeof(Entry));
+        assert(cur_.length);
+        assert(cur_.run);
+      }
+    }
+
+    uint64_t RemainingBlocks() const { return block_count_; }
+
+    uint64_t TotalOffset() const { return output_sum_; }
+
+    uint64_t PeekSize() const {
+      return cur_.length;
+    }
+
+    uint64_t NextSize() {
+      assert(block_count_);
+      uint64_t ret = cur_.length;
+      output_sum_ += ret;
+
+      --cur_.run;
+      --block_count_;
+      if (!cur_.run && block_count_) {
+        ReadOrThrow(log_, &cur_, sizeof(Entry));
+        assert(cur_.length);
+        assert(cur_.run);
+      }
+      return ret;
+    }
+
+    void Reset() {
+      SeekOrThrow(log_, 0);
+      ResizeOrThrow(log_, 0);
+      cur_.length = 0;
+      cur_.run = 0;
+      block_count_ = 0;
+      output_sum_ = 0;
+    }
+
+  private:
+    int log_;
+
+    struct Entry {
+      uint64_t length;
+      uint64_t run;
+    };
+    Entry cur_;
+
+    uint64_t block_count_;
+
+    uint64_t output_sum_;
+};
+
+// A priority queue of entries backed by file buffers
+template <class Compare> class MergeQueue {
+  public:
+    MergeQueue(int fd, std::size_t buffer_size, std::size_t entry_size, const Compare &compare)
+      : queue_(Greater(compare)), in_(fd), buffer_size_(buffer_size), entry_size_(entry_size) {}
+
+    void Push(void *base, uint64_t offset, uint64_t amount) {
+      queue_.push(Entry(base, in_, offset, amount, buffer_size_));
+    }
+
+    const void *Top() const {
+      return queue_.top().Current();
+    }
+
+    void Pop() {
+      Entry top(queue_.top());
+      queue_.pop();
+      if (top.Increment(in_, buffer_size_, entry_size_))
+        queue_.push(top);
+    }
+
+    std::size_t Size() const {
+      return queue_.size();
+    }
+
+    bool Empty() const {
+      return queue_.empty();
+    }
+
+  private:
+    // Priority queue contains these entries.  
+    class Entry {
+      public:
+        Entry() {}
+
+        Entry(void *base, int fd, uint64_t offset, uint64_t amount, std::size_t buf_size) {
+          offset_ = offset;
+          remaining_ = amount;
+          buffer_end_ = static_cast<uint8_t*>(base) + buf_size;
+          Read(fd, buf_size);
+        }
+
+        bool Increment(int fd, std::size_t buf_size, std::size_t entry_size) {
+          current_ += entry_size;
+          if (current_ != buffer_end_) return true;
+          return Read(fd, buf_size);
+        }
+
+        const void *Current() const { return current_; }
+
+      private:
+        bool Read(int fd, std::size_t buf_size) {
+          current_ = buffer_end_ - buf_size;
+          std::size_t amount;
+          if (static_cast<uint64_t>(buf_size) < remaining_) {
+            amount = buf_size;
+          } else if (!remaining_) {
+            return false;
+          } else {
+            amount = remaining_;
+            buffer_end_ = current_ + remaining_;
+          }
+          PReadOrThrow(fd, current_, amount, offset_);
+          offset_ += amount;
+          assert(current_ <= buffer_end_);
+          remaining_ -= amount;
+          return true;
+        }
+
+        // Buffer
+        uint8_t *current_, *buffer_end_;
+        // File
+        uint64_t remaining_, offset_;
+    };
+
+    // Wrapper comparison function for queue entries.   
+    class Greater : public std::binary_function<const Entry &, const Entry &, bool> {
+      public:
+        explicit Greater(const Compare &compare) : compare_(compare) {}
+
+        bool operator()(const Entry &first, const Entry &second) const {
+          return compare_(second.Current(), first.Current());
+        }
+
+      private:
+        const Compare compare_;
+    };
+
+    typedef std::priority_queue<Entry, std::vector<Entry>, Greater> Queue;
+    Queue queue_;
+
+    const int in_;
+    const std::size_t buffer_size_;
+    const std::size_t entry_size_;
+};
+
+/* A worker object that merges.  If the number of pieces to merge exceeds the
+ * arity, it outputs multiple sorted blocks, recording to out_offsets.  
+ * However, users will only every see a single sorted block out output because
+ * Sort::Sorted insures the arity is higher than the number of pieces before
+ * returning this.   
+ */
+template <class Compare, class Combine> class MergingReader {
+  public:
+    MergingReader(int in, Offsets *in_offsets, Offsets *out_offsets, std::size_t buffer_size, std::size_t total_memory, const Compare &compare, const Combine &combine) :
+        compare_(compare), combine_(combine),
+        in_(in),
+        in_offsets_(in_offsets), out_offsets_(out_offsets),
+        buffer_size_(buffer_size), total_memory_(total_memory) {}
+
+    void Run(const ChainPosition &position) {
+      Run(position, false);
+    }
+
+    void Run(const ChainPosition &position, bool assert_one) {
+      // Special case: nothing to read.  
+      if (!in_offsets_->RemainingBlocks()) {
+        Link l(position);
+        l.Poison();
+        return;
+      }
+      // If there's just one entry, just read.
+      if (in_offsets_->RemainingBlocks() == 1) {
+        // Sequencing is important.
+        uint64_t offset = in_offsets_->TotalOffset();
+        uint64_t amount = in_offsets_->NextSize();
+        ReadSingle(offset, amount, position);
+        if (out_offsets_) out_offsets_->Append(amount);
+        return;
+      }
+
+      Stream str(position);
+      scoped_malloc buffer(MallocOrThrow(total_memory_));
+      uint8_t *const buffer_end = static_cast<uint8_t*>(buffer.get()) + total_memory_;
+
+      const std::size_t entry_size = position.GetChain().EntrySize();
+
+      while (in_offsets_->RemainingBlocks()) {
+        // Use bigger buffers if there's less remaining.
+        uint64_t per_buffer = std::max(buffer_size_, total_memory_ / in_offsets_->RemainingBlocks());
+        per_buffer -= per_buffer % entry_size;
+        assert(per_buffer);
+
+        // Populate queue.
+        MergeQueue<Compare> queue(in_, per_buffer, entry_size, compare_);
+        for (uint8_t *buf = static_cast<uint8_t*>(buffer.get()); 
+            in_offsets_->RemainingBlocks() && (buf + std::min(per_buffer, in_offsets_->PeekSize()) <= buffer_end);) {
+          uint64_t offset = in_offsets_->TotalOffset();
+          uint64_t size = in_offsets_->NextSize();
+          queue.Push(buf, offset, size);
+          buf += static_cast<std::size_t>(std::min<uint64_t>(size, per_buffer));
+        }
+        // This shouldn't happen but it's probably better to die than loop indefinitely.
+        if (queue.Size() < 2 && in_offsets_->RemainingBlocks()) {
+          std::cerr << "Bug in sort implementation: not merging at least two stripes." << std::endl;
+          abort();
+        }
+        if (assert_one && in_offsets_->RemainingBlocks()) {
+          std::cerr << "Bug in sort implementation: should only be one merge group for lazy sort" << std::endl;
+          abort();
+        }
+
+        uint64_t written = 0;
+        // Merge including combiner support.  
+        memcpy(str.Get(), queue.Top(), entry_size);
+        for (queue.Pop(); !queue.Empty(); queue.Pop()) {
+          if (!combine_(str.Get(), queue.Top(), compare_)) {
+            ++written; ++str;
+            memcpy(str.Get(), queue.Top(), entry_size);
+          }
+        }
+        ++written; ++str;
+        if (out_offsets_)
+          out_offsets_->Append(written * entry_size);
+      }
+      str.Poison();
+    }
+
+  private: 
+    void ReadSingle(uint64_t offset, const uint64_t size, const ChainPosition &position) {
+      // Special case: only one to read.  
+      const uint64_t end = offset + size;
+      const uint64_t block_size = position.GetChain().BlockSize();
+      Link l(position);
+      for (; offset + block_size < end; ++l, offset += block_size) {
+        PReadOrThrow(in_, l->Get(), block_size, offset);
+        l->SetValidSize(block_size);
+      }
+      PReadOrThrow(in_, l->Get(), end - offset, offset);
+      l->SetValidSize(end - offset);
+      (++l).Poison();
+      return;
+    }
+   
+    Compare compare_;
+    Combine combine_;
+
+    int in_;
+
+  protected:
+    Offsets *in_offsets_;
+
+  private:
+    Offsets *out_offsets_;
+ 
+    std::size_t buffer_size_;
+    std::size_t total_memory_;
+};
+
+// The lazy step owns the remaining files.  This keeps track of them.  
+template <class Compare, class Combine> class OwningMergingReader : public MergingReader<Compare, Combine> {
+  private:
+    typedef MergingReader<Compare, Combine> P;
+  public:
+    OwningMergingReader(int data, const Offsets &offsets, std::size_t buffer, std::size_t lazy, const Compare &compare, const Combine &combine) 
+      : P(data, NULL, NULL, buffer, lazy, compare, combine),
+        data_(data),
+        offsets_(offsets) {}
+
+    void Run(const ChainPosition &position) {
+      P::in_offsets_ = &offsets_;
+      scoped_fd data(data_);
+      scoped_fd offsets_file(offsets_.File());
+      P::Run(position, true);
+    }
+
+  private:
+    int data_;
+    Offsets offsets_;
+};
+
+// Don't use this directly.  Worker that sorts blocks.   
+template <class Compare> class BlockSorter {
+  public:
+    BlockSorter(Offsets &offsets, const Compare &compare) :
+      offsets_(&offsets), compare_(compare) {}
+
+    void Run(const ChainPosition &position) {
+      const std::size_t entry_size = position.GetChain().EntrySize();
+      for (Link link(position); link; ++link) {
+        // Record the size of each block in a separate file.    
+        offsets_->Append(link->ValidSize());
+        void *end = static_cast<uint8_t*>(link->Get()) + link->ValidSize();
+        std::sort(
+            SizedIt(link->Get(), entry_size),
+            SizedIt(end, entry_size),
+            compare_);
+      }
+      offsets_->FinishedAppending();
+    }
+
+  private:
+    Offsets *offsets_;
+    SizedCompare<Compare> compare_;
+};
+
+class BadSortConfig : public Exception {
+  public:
+    BadSortConfig() throw() {}
+    ~BadSortConfig() throw() {}
+};
+
+template <class Compare, class Combine = NeverCombine> class Sort {
+  public:
+    Sort(Chain &in, const SortConfig &config, const Compare &compare = Compare(), const Combine &combine = Combine())
+      : config_(config),
+        data_(MakeTemp(config.temp_prefix)),
+        offsets_file_(MakeTemp(config.temp_prefix)), offsets_(offsets_file_.get()),
+        compare_(compare), combine_(combine),
+        entry_size_(in.EntrySize()) {
+      UTIL_THROW_IF(!entry_size_, BadSortConfig, "Sorting entries of size 0");
+      // Make buffer_size a multiple of the entry_size.  
+      config_.buffer_size -= config_.buffer_size % entry_size_;
+      UTIL_THROW_IF(!config_.buffer_size, BadSortConfig, "Sort buffer too small");
+      UTIL_THROW_IF(config_.total_memory < config_.buffer_size * 4, BadSortConfig, "Sorting memory " << config_.total_memory << " is too small for four buffers (two read and two write).");
+      in >> BlockSorter<Compare>(offsets_, compare_) >> WriteAndRecycle(data_.get());
+    }
+
+    uint64_t Size() const {
+      return SizeOrThrow(data_.get());
+    }
+
+    // Do merge sort, terminating when lazy merge could be done with the
+    // specified memory.  Return the minimum memory necessary to do lazy merge.
+    std::size_t Merge(std::size_t lazy_memory) {
+      if (offsets_.RemainingBlocks() <= 1) return 0;
+      const uint64_t lazy_arity = std::max<uint64_t>(1, lazy_memory / config_.buffer_size);
+      uint64_t size = Size();
+      /* No overflow because
+       * offsets_.RemainingBlocks() * config_.buffer_size <= lazy_memory ||
+       * size < lazy_memory
+       */
+      if (offsets_.RemainingBlocks() <= lazy_arity || size <= static_cast<uint64_t>(lazy_memory))
+        return std::min<std::size_t>(size, offsets_.RemainingBlocks() * config_.buffer_size);
+
+      scoped_fd data2(MakeTemp(config_.temp_prefix));
+      int fd_in = data_.get(), fd_out = data2.get();
+      scoped_fd offsets2_file(MakeTemp(config_.temp_prefix));
+      Offsets offsets2(offsets2_file.get());
+      Offsets *offsets_in = &offsets_, *offsets_out = &offsets2;
+
+      // Double buffered writing.  
+      ChainConfig chain_config;
+      chain_config.entry_size = entry_size_;
+      chain_config.block_count = 2;
+      chain_config.total_memory = config_.buffer_size * 2;
+      Chain chain(chain_config);
+
+      while (offsets_in->RemainingBlocks() > lazy_arity) {
+        if (size <= static_cast<uint64_t>(lazy_memory)) break;
+        std::size_t reading_memory = config_.total_memory - 2 * config_.buffer_size;
+        if (size < static_cast<uint64_t>(reading_memory)) {
+          reading_memory = static_cast<std::size_t>(size);
+        }
+        SeekOrThrow(fd_in, 0);
+        chain >>
+          MergingReader<Compare, Combine>(
+              fd_in,
+              offsets_in, offsets_out,
+              config_.buffer_size,
+              reading_memory,
+              compare_, combine_) >>
+          WriteAndRecycle(fd_out);
+        chain.Wait();
+        offsets_out->FinishedAppending();
+        ResizeOrThrow(fd_in, 0);
+        offsets_in->Reset();
+        std::swap(fd_in, fd_out);
+        std::swap(offsets_in, offsets_out);
+        size = SizeOrThrow(fd_in);
+      }
+
+      SeekOrThrow(fd_in, 0);
+      if (fd_in == data2.get()) {
+        data_.reset(data2.release());
+        offsets_file_.reset(offsets2_file.release());
+        offsets_ = offsets2;
+      }
+      if (offsets_.RemainingBlocks() <= 1) return 0;
+      // No overflow because the while loop exited.
+      return std::min(size, offsets_.RemainingBlocks() * static_cast<uint64_t>(config_.buffer_size));
+    }
+
+    // Output to chain, using this amount of memory, maximum, for lazy merge
+    // sort.  
+    void Output(Chain &out, std::size_t lazy_memory) {
+      Merge(lazy_memory);
+      out.SetProgressTarget(Size());
+      out >> OwningMergingReader<Compare, Combine>(data_.get(), offsets_, config_.buffer_size, lazy_memory, compare_, combine_);
+      data_.release();
+      offsets_file_.release();
+    }
+
+    /* If a pipeline step is reading sorted input and writing to a different
+     * sort order, then there's a trade-off between using RAM to read lazily
+     * (avoiding copying the file) and using RAM to increase block size and, 
+     * therefore, decrease the number of merge sort passes in the next
+     * iteration.  
+     * 
+     * Merge sort takes log_{arity}(pieces) passes.  Thus, each time the chain
+     * block size is multiplied by arity, the number of output passes decreases
+     * by one.  Up to a constant, then, log_{arity}(chain) is the number of
+     * passes saved.  Chain simply divides the memory evenly over all blocks.
+     * 
+     * Lazy sort saves this many passes (up to a constant)
+     *   log_{arity}((memory-lazy)/block_count) + 1
+     * Non-lazy sort saves this many passes (up to the same constant):
+     *   log_{arity}(memory/block_count)
+     * Add log_{arity}(block_count) to both:
+     *   log_{arity}(memory-lazy) + 1 versus log_{arity}(memory)
+     * Take arity to the power of both sizes (arity > 1)
+     *   (memory - lazy)*arity versus memory
+     * Solve for lazy
+     *   lazy = memory * (arity - 1) / arity
+     */
+    std::size_t DefaultLazy() {
+      float arity = static_cast<float>(config_.total_memory / config_.buffer_size);
+      return static_cast<std::size_t>(static_cast<float>(config_.total_memory) * (arity - 1.0) / arity);
+    }
+
+    // Same as Output with default lazy memory setting.
+    void Output(Chain &out) {
+      Output(out, DefaultLazy());
+    }
+
+    // Completely merge sort and transfer ownership to the caller.
+    int StealCompleted() {
+      // Merge all the way.
+      Merge(0);
+      SeekOrThrow(data_.get(), 0);
+      offsets_file_.reset();
+      return data_.release();
+    }
+
+  private:
+    SortConfig config_;
+
+    scoped_fd data_;
+
+    scoped_fd offsets_file_;
+    Offsets offsets_;
+
+    const Compare compare_;
+    const Combine combine_;
+    const std::size_t entry_size_;
+};
+
+// returns bytes to be read on demand.  
+template <class Compare, class Combine> uint64_t BlockingSort(Chain &chain, const SortConfig &config, const Compare &compare = Compare(), const Combine &combine = NeverCombine()) {
+  Sort<Compare, Combine> sorter(chain, config, compare, combine);
+  chain.Wait(true);
+  uint64_t size = sorter.Size();
+  sorter.Output(chain);
+  return size;
+}
+
+} // namespace stream
+} // namespace util
+
+#endif // UTIL_STREAM_SORT__
diff --git a/klm/util/stream/sort_test.cc b/klm/util/stream/sort_test.cc
new file mode 100644
index 00000000..fd7705cd
--- /dev/null
+++ b/klm/util/stream/sort_test.cc
@@ -0,0 +1,62 @@
+#include "util/stream/sort.hh"
+
+#define BOOST_TEST_MODULE SortTest
+#include <boost/test/unit_test.hpp>
+
+#include <algorithm>
+
+#include <unistd.h>
+
+namespace util { namespace stream { namespace {
+
+struct CompareUInt64 : public std::binary_function<const void *, const void *, bool> {
+  bool operator()(const void *first, const void *second) const {
+    return *static_cast<const uint64_t*>(first) < *reinterpret_cast<const uint64_t*>(second);
+  }
+};
+
+const uint64_t kSize = 100000;
+
+struct Putter {
+  Putter(std::vector<uint64_t> &shuffled) : shuffled_(shuffled) {}
+
+  void Run(const ChainPosition &position) {
+    Stream put_shuffled(position);
+    for (uint64_t i = 0; i < shuffled_.size(); ++i, ++put_shuffled) {
+      *static_cast<uint64_t*>(put_shuffled.Get()) = shuffled_[i];
+    }
+    put_shuffled.Poison();
+  }
+  std::vector<uint64_t> &shuffled_;
+};
+
+BOOST_AUTO_TEST_CASE(FromShuffled) {
+  std::vector<uint64_t> shuffled;
+  shuffled.reserve(kSize);
+  for (uint64_t i = 0; i < kSize; ++i) {
+    shuffled.push_back(i);
+  }
+  std::random_shuffle(shuffled.begin(), shuffled.end());
+  
+  ChainConfig config;
+  config.entry_size = 8;
+  config.total_memory = 800;
+  config.block_count = 3;
+
+  SortConfig merge_config;
+  merge_config.temp_prefix = "sort_test_temp";
+  merge_config.buffer_size = 800;
+  merge_config.total_memory = 3300;
+
+  Chain chain(config);
+  chain >> Putter(shuffled);
+  BlockingSort(chain, merge_config, CompareUInt64(), NeverCombine());
+  Stream sorted;
+  chain >> sorted >> kRecycle;
+  for (uint64_t i = 0; i < kSize; ++i, ++sorted) {
+    BOOST_CHECK_EQUAL(i, *static_cast<const uint64_t*>(sorted.Get()));
+  }
+  BOOST_CHECK(!sorted);
+}
+
+}}} // namespaces
diff --git a/klm/util/stream/stream.hh b/klm/util/stream/stream.hh
new file mode 100644
index 00000000..6ff45b82
--- /dev/null
+++ b/klm/util/stream/stream.hh
@@ -0,0 +1,74 @@
+#ifndef UTIL_STREAM_STREAM__
+#define UTIL_STREAM_STREAM__
+
+#include "util/stream/chain.hh"
+
+#include <boost/noncopyable.hpp>
+
+#include <assert.h>
+#include <stdint.h>
+
+namespace util {
+namespace stream {
+
+class Stream : boost::noncopyable {
+  public:
+    Stream() : current_(NULL), end_(NULL) {}
+
+    void Init(const ChainPosition &position) {
+      entry_size_ = position.GetChain().EntrySize();
+      block_size_ = position.GetChain().BlockSize();
+      block_it_.Init(position);
+      StartBlock();
+    }
+
+    explicit Stream(const ChainPosition &position) {
+      Init(position);
+    }
+
+    operator bool() const { return current_ != NULL; }
+    bool operator!() const { return current_ == NULL; }
+
+    const void *Get() const { return current_; }
+    void *Get() { return current_; }
+
+    void Poison() {
+      block_it_->SetValidSize(current_ - static_cast<uint8_t*>(block_it_->Get()));
+      ++block_it_;
+      block_it_.Poison();
+    }
+    
+    Stream &operator++() {
+      assert(*this);
+      assert(current_ < end_);
+      current_ += entry_size_;
+      if (current_ == end_) {
+        ++block_it_;
+        StartBlock();
+      }
+      return *this;
+    }
+
+  private:
+    void StartBlock() {
+      for (; block_it_ && !block_it_->ValidSize(); ++block_it_) {}
+      current_ = static_cast<uint8_t*>(block_it_->Get());
+      end_ = current_ + block_it_->ValidSize();
+    }
+
+    uint8_t *current_, *end_;
+
+    std::size_t entry_size_;
+    std::size_t block_size_;
+
+    Link block_it_;
+};
+
+inline Chain &operator>>(Chain &chain, Stream &stream) {
+  stream.Init(chain.Add());
+  return chain;
+}
+
+} // namespace stream
+} // namespace util
+#endif // UTIL_STREAM_STREAM__
diff --git a/klm/util/stream/stream_test.cc b/klm/util/stream/stream_test.cc
new file mode 100644
index 00000000..6575d50d
--- /dev/null
+++ b/klm/util/stream/stream_test.cc
@@ -0,0 +1,35 @@
+#include "util/stream/io.hh"
+
+#include "util/stream/stream.hh"
+#include "util/file.hh"
+
+#define BOOST_TEST_MODULE StreamTest
+#include <boost/test/unit_test.hpp>
+
+#include <unistd.h>
+
+namespace util { namespace stream { namespace {
+
+BOOST_AUTO_TEST_CASE(StreamTest) {
+  scoped_fd in(MakeTemp("io_test_temp"));
+  for (uint64_t i = 0; i < 100000; ++i) {
+    WriteOrThrow(in.get(), &i, sizeof(uint64_t));
+  }
+  SeekOrThrow(in.get(), 0);
+
+  ChainConfig config;
+  config.entry_size = 8;
+  config.total_memory = 100;
+  config.block_count = 12;
+
+  Stream s;
+  Chain chain(config);
+  chain >> Read(in.get()) >> s >> kRecycle;
+  uint64_t i = 0;
+  for (; s; ++s, ++i) {
+    BOOST_CHECK_EQUAL(i, *static_cast<const uint64_t*>(s.Get()));
+  }
+  BOOST_CHECK_EQUAL(100000ULL, i);
+}
+
+}}} // namespaces
diff --git a/klm/util/stream/timer.hh b/klm/util/stream/timer.hh
new file mode 100644
index 00000000..50e94fe8
--- /dev/null
+++ b/klm/util/stream/timer.hh
@@ -0,0 +1,14 @@
+#ifndef UTIL_STREAM_TIMER__
+#define UTIL_STREAM_TIMER__
+
+#include <boost/version.hpp>
+
+#if BOOST_VERSION >= 104800
+#include <boost/timer/timer.hpp>
+#define UTIL_TIMER(str) boost::timer::auto_cpu_timer timer(std::cerr, 1, (str))
+#else
+//#warning Using Boost older than 1.48. Timing information will not be available.
+#define UTIL_TIMER(str) 
+#endif
+
+#endif // UTIL_STREAM_TIMER__
diff --git a/klm/util/thread_pool.hh b/klm/util/thread_pool.hh
new file mode 100644
index 00000000..84e257ea
--- /dev/null
+++ b/klm/util/thread_pool.hh
@@ -0,0 +1,95 @@
+#ifndef UTIL_THREAD_POOL__
+#define UTIL_THREAD_POOL__
+
+#include "util/pcqueue.hh"
+
+#include <boost/ptr_container/ptr_vector.hpp>
+#include <boost/optional.hpp>
+#include <boost/thread.hpp>
+
+#include <iostream>
+
+#include <stdlib.h>
+
+namespace util {
+
+template <class HandlerT> class Worker : boost::noncopyable {
+  public:
+    typedef HandlerT Handler;
+    typedef typename Handler::Request Request;
+
+    template <class Construct> Worker(PCQueue<Request> &in, Construct &construct, Request &poison)
+      : in_(in), handler_(construct), thread_(boost::ref(*this)), poison_(poison) {}
+
+    // Only call from thread.
+    void operator()() {
+      Request request;
+      while (1) {
+        in_.Consume(request);
+        if (request == poison_) return;
+        try {
+          (*handler_)(request);
+        }
+        catch(std::exception &e) {
+          std::cerr << "Handler threw " << e.what() << std::endl;
+          abort();
+        }
+        catch(...) {
+          std::cerr << "Handler threw an exception, dropping request" << std::endl;
+          abort();
+        }
+      }
+    }
+
+    void Join() {
+      thread_.join();
+    }
+
+  private:
+    PCQueue<Request> &in_;
+
+    boost::optional<Handler> handler_;
+
+    boost::thread thread_;
+
+    Request poison_;
+};
+
+template <class HandlerT> class ThreadPool : boost::noncopyable {
+  public:
+    typedef HandlerT Handler;
+    typedef typename Handler::Request Request;
+
+    template <class Construct> ThreadPool(size_t queue_length, size_t workers, Construct handler_construct, Request poison) : in_(queue_length), poison_(poison) {
+      for (size_t i = 0; i < workers; ++i) {
+        workers_.push_back(new Worker<Handler>(in_, handler_construct, poison));
+      }
+    }
+
+    ~ThreadPool() {
+      for (size_t i = 0; i < workers_.size(); ++i) {
+        Produce(poison_);
+      }
+      for (typename boost::ptr_vector<Worker<Handler> >::iterator i = workers_.begin(); i != workers_.end(); ++i) {
+        i->Join();
+      }
+    }
+
+    void Produce(const Request &request) {
+      in_.Produce(request);
+    }
+
+    // For adding to the queue.
+    PCQueue<Request> &In() { return in_; }
+
+  private:
+    PCQueue<Request> in_;
+
+    boost::ptr_vector<Worker<Handler> > workers_;
+
+    Request poison_;
+};
+
+} // namespace util
+
+#endif // UTIL_THREAD_POOL__
diff --git a/klm/util/usage.cc b/klm/util/usage.cc
index e5cf76f0..16a004bb 100644
--- a/klm/util/usage.cc
+++ b/klm/util/usage.cc
@@ -1,13 +1,17 @@
 #include "util/usage.hh"
 
+#include "util/exception.hh"
+
 #include <fstream>
 #include <ostream>
+#include <sstream>
 
 #include <string.h>
 #include <ctype.h>
 #if !defined(_WIN32) && !defined(_WIN64)
 #include <sys/resource.h>
 #include <sys/time.h>
+#include <unistd.h>
 #endif
 
 namespace util {
@@ -43,4 +47,60 @@ void PrintUsage(std::ostream &out) {
 #endif
 }
 
+uint64_t GuessPhysicalMemory() {
+#if defined(_WIN32) || defined(_WIN64)
+  return 0;
+#elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
+  long pages = sysconf(_SC_PHYS_PAGES);
+  if (pages == -1) return 0;
+  long page_size = sysconf(_SC_PAGESIZE);
+  if (page_size == -1) return 0;
+  return static_cast<uint64_t>(pages) * static_cast<uint64_t>(page_size);
+#else
+  return 0;
+#endif
+}
+
+namespace {
+class SizeParseError : public Exception {
+  public:
+    explicit SizeParseError(const std::string &str) throw() {
+      *this << "Failed to parse " << str << " into a memory size ";
+    }
+};
+
+template <class Num> uint64_t ParseNum(const std::string &arg) {
+  std::stringstream stream(arg);
+  Num value;
+  stream >> value;
+  UTIL_THROW_IF_ARG(!stream, SizeParseError, (arg), "for the leading number.");
+  std::string after;
+  stream >> after;
+  UTIL_THROW_IF_ARG(after.size() > 1, SizeParseError, (arg), "because there are more than two characters after the number.");
+  std::string throwaway;
+  UTIL_THROW_IF_ARG(stream >> throwaway, SizeParseError, (arg), "because there was more cruft " << throwaway << " after the number.");
+
+  // Silly sort, using kilobytes as your default unit.  
+  if (after.empty()) after == "K";
+  if (after == "%") {
+    uint64_t mem = GuessPhysicalMemory();
+    UTIL_THROW_IF_ARG(!mem, SizeParseError, (arg), "because % was specified but the physical memory size could not be determined.");
+    return static_cast<double>(value) * static_cast<double>(mem) / 100.0;
+  }
+  
+  std::string units("bKMGTPEZY");
+  std::string::size_type index = units.find(after[0]);
+  UTIL_THROW_IF_ARG(index == std::string::npos, SizeParseError, (arg), "the allowed suffixes are " << units << "%.");
+  for (std::string::size_type i = 0; i < index; ++i) {
+    value *= 1024;
+  }
+  return value;
+}
+
+} // namespace
+
+uint64_t ParseSize(const std::string &arg) {
+  return arg.find('.') == std::string::npos ? ParseNum<double>(arg) : ParseNum<uint64_t>(arg);
+}
+
 } // namespace util
diff --git a/klm/util/usage.hh b/klm/util/usage.hh
index d331ff74..e19eda7b 100644
--- a/klm/util/usage.hh
+++ b/klm/util/usage.hh
@@ -1,8 +1,18 @@
 #ifndef UTIL_USAGE__
 #define UTIL_USAGE__
+#include <cstddef>
 #include <iosfwd>
+#include <string>
+
+#include <stdint.h>
 
 namespace util {
 void PrintUsage(std::ostream &to);
+
+// Determine how much physical memory there is.  Return 0 on failure.
+uint64_t GuessPhysicalMemory();
+
+// Parse a size like unix sort.  Sadly, this means the default multiplier is K.
+uint64_t ParseSize(const std::string &arg);
 } // namespace util
 #endif // UTIL_USAGE__