From dc078281c2fd83db9fc4860e8b24979ab2a1e407 Mon Sep 17 00:00:00 2001
From: Kenneth Heafield <github@kheafield.com>
Date: Thu, 16 Aug 2012 17:02:56 -0400
Subject: KenLM update.  Remove a couple of segfaults for weird input.  Other
 oddball stuff.

---
 klm/lm/bhiksha.cc        |  1 +
 klm/lm/bhiksha.hh        |  2 +-
 klm/lm/build_binary.cc   |  8 ++++++--
 klm/lm/left.hh           |  2 +-
 klm/lm/max_order.cc      |  6 ++++++
 klm/lm/max_order.hh      | 26 ++++++++++++--------------
 klm/lm/model.cc          | 11 +++++++++--
 klm/lm/model.hh          |  1 -
 klm/lm/quantize.hh       |  4 ++--
 klm/lm/read_arpa.cc      | 16 +++++++++++++++-
 klm/lm/search_trie.cc    | 20 ++++++++++----------
 klm/lm/state.hh          | 10 +++++-----
 klm/lm/trie.hh           |  2 +-
 klm/lm/trie_sort.cc      | 24 ++++++++++++++++--------
 klm/lm/trie_sort.hh      |  4 ++--
 klm/lm/value.hh          |  2 +-
 klm/lm/value_build.hh    |  6 +++---
 klm/lm/vocab.hh          |  4 ++--
 klm/util/file.cc         | 10 ++++++++++
 klm/util/file.hh         |  8 +++++++-
 klm/util/file_piece.cc   |  2 +-
 klm/util/have.hh         |  2 +-
 klm/util/mmap.cc         | 16 +---------------
 klm/util/string_piece.hh |  5 +++++
 24 files changed, 118 insertions(+), 74 deletions(-)
 create mode 100644 klm/lm/max_order.cc
diff --git a/klm/lm/bhiksha.cc b/klm/lm/bhiksha.cc
index cdeafb47..870a4eee 100644
--- a/klm/lm/bhiksha.cc
+++ b/klm/lm/bhiksha.cc
@@ -1,6 +1,7 @@
 #include "lm/bhiksha.hh"
 #include "lm/config.hh"
 #include "util/file.hh"
+#include "util/exception.hh"
 
 #include <limits>
 
diff --git a/klm/lm/bhiksha.hh b/klm/lm/bhiksha.hh
index 5182ee2e..9734f3ab 100644
--- a/klm/lm/bhiksha.hh
+++ b/klm/lm/bhiksha.hh
@@ -23,7 +23,7 @@
 
 namespace lm {
 namespace ngram {
-class Config;
+struct Config;
 
 namespace trie {
 
diff --git a/klm/lm/build_binary.cc b/klm/lm/build_binary.cc
index c4a01cb4..49901c9e 100644
--- a/klm/lm/build_binary.cc
+++ b/klm/lm/build_binary.cc
@@ -25,7 +25,11 @@ void Usage(const char *name) {
 "-i allows buggy models from IRSTLM by mapping positive log probability to 0.\n"
 "-w mmap|after determines how writing is done.\n"
 "   mmap maps the binary file and writes to it.  Default for trie.\n"
-"   after allocates anonymous memory, builds, and writes.  Default for probing.\n\n"
+"   after allocates anonymous memory, builds, and writes.  Default for probing.\n"
+"-r \"order1.arpa order2 order3 order4\" adds lower-order rest costs from these\n"
+"   model files.  order1.arpa must be an ARPA file.  All others may be ARPA or\n"
+"   the same data structure as being built.  All files must have the same\n"
+"   vocabulary.  For probing, the unigrams must be in the same order.\n\n"
 "type is either probing or trie.  Default is probing.\n\n"
 "probing uses a probing hash table.  It is the fastest but uses the most memory.\n"
 "-p sets the space multiplier and must be >1.0.  The default is 1.5.\n\n"
@@ -111,7 +115,7 @@ void ShowSizes(const char *file, const lm::ngram::Config &config) {
   for (long int i = 0; i < length - 2; ++i) std::cout << ' ';
   std::cout << prefix << "B\n"
     "probing " << std::setw(length) << (sizes[0] / divide) << " assuming -p " << config.probing_multiplier << "\n"
-    "probing " << std::setw(length) << (sizes[1] / divide) << " assuming -r -p " << config.probing_multiplier << "\n"
+    "probing " << std::setw(length) << (sizes[1] / divide) << " assuming -r models -p " << config.probing_multiplier << "\n"
     "trie    " << std::setw(length) << (sizes[2] / divide) << " without quantization\n"
     "trie    " << std::setw(length) << (sizes[3] / divide) << " assuming -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits << " quantization \n"
     "trie    " << std::setw(length) << (sizes[4] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " array pointer compression\n"
diff --git a/klm/lm/left.hh b/klm/lm/left.hh
index c00af88a..8c27232e 100644
--- a/klm/lm/left.hh
+++ b/klm/lm/left.hh
@@ -111,7 +111,7 @@ template <class M> class RuleScore {
         return;
       }
 
-      float backoffs[kMaxOrder - 1], backoffs2[kMaxOrder - 1];
+      float backoffs[KENLM_MAX_ORDER - 1], backoffs2[KENLM_MAX_ORDER - 1];
       float *back = backoffs, *back2 = backoffs2;
       unsigned char next_use = out_.right.length;
 
diff --git a/klm/lm/max_order.cc b/klm/lm/max_order.cc
new file mode 100644
index 00000000..94221201
--- /dev/null
+++ b/klm/lm/max_order.cc
@@ -0,0 +1,6 @@
+#include "lm/max_order.hh"
+#include <iostream>
+
+int main(int argc, char *argv[]) {
+  std::cerr << "KenLM was compiled with a maximum supported n-gram order set to " << KENLM_MAX_ORDER << "." << std::endl;
+}
diff --git a/klm/lm/max_order.hh b/klm/lm/max_order.hh
index aff9de27..bc8687cd 100644
--- a/klm/lm/max_order.hh
+++ b/klm/lm/max_order.hh
@@ -1,14 +1,12 @@
-#ifndef LM_MAX_ORDER__
-#define LM_MAX_ORDER__
-namespace lm {
-namespace ngram {
-// If you need higher order, change this and recompile.  
-// Having this limit means that State can be
-// (kMaxOrder - 1) * sizeof(float) bytes instead of
-// sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead
-const unsigned char kMaxOrder = 5;
-
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_MAX_ORDER__
+/* IF YOUR BUILD SYSTEM PASSES -DKENLM_MAX_ORDER, THEN CHANGE THE BUILD SYSTEM.
+ * If not, this is the default maximum order.  
+ * Having this limit means that State can be
+ * (kMaxOrder - 1) * sizeof(float) bytes instead of
+ * sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead
+ */
+#ifndef KENLM_MAX_ORDER
+#define KENLM_MAX_ORDER 6
+#endif
+#ifndef KENLM_ORDER_MESSAGE
+#define KENLM_ORDER_MESSAGE "Edit klm/lm/max_order.hh."
+#endif
diff --git a/klm/lm/model.cc b/klm/lm/model.cc
index a2d31ce0..b46333a4 100644
--- a/klm/lm/model.cc
+++ b/klm/lm/model.cc
@@ -5,6 +5,7 @@
 #include "lm/search_hashed.hh"
 #include "lm/search_trie.hh"
 #include "lm/read_arpa.hh"
+#include "util/have.hh"
 #include "util/murmur_hash.hh"
 
 #include <algorithm>
@@ -47,7 +48,14 @@ template <class Search, class VocabularyT> GenericModel<Search, VocabularyT>::Ge
   P::Init(begin_sentence, null_context, vocab_, search_.Order());
 }
 
+namespace {
+void CheckMaxOrder(size_t order) {
+  UTIL_THROW_IF(order > KENLM_MAX_ORDER, FormatLoadException, "This model has order " << order << " but KenLM was compiled to support up to " << KENLM_MAX_ORDER << ".  " << KENLM_ORDER_MESSAGE);
+}
+} // namespace
+
 template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::InitializeFromBinary(void *start, const Parameters &params, const Config &config, int fd) {
+  CheckMaxOrder(params.counts.size());
   SetupMemory(start, params.counts, config);
   vocab_.LoadedBinary(params.fixed.has_vocabulary, fd, config.enumerate_vocab);
   search_.LoadedBinary();
@@ -60,8 +68,7 @@ template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT
     std::vector<uint64_t> counts;
     // File counts do not include pruned trigrams that extend to quadgrams etc.   These will be fixed by search_.
     ReadARPACounts(f, counts);
-
-    if (counts.size() > kMaxOrder) UTIL_THROW(FormatLoadException, "This model has order " << counts.size() << ".  Edit lm/max_order.hh, set kMaxOrder to at least this value, and recompile.");
+    CheckMaxOrder(counts.size());
     if (counts.size() < 2) UTIL_THROW(FormatLoadException, "This ngram implementation assumes at least a bigram model.");
     if (config.probing_multiplier <= 1.0) UTIL_THROW(ConfigException, "probing multiplier must be > 1.0");
 
diff --git a/klm/lm/model.hh b/klm/lm/model.hh
index be872178..6dee9419 100644
--- a/klm/lm/model.hh
+++ b/klm/lm/model.hh
@@ -5,7 +5,6 @@
 #include "lm/binary_format.hh"
 #include "lm/config.hh"
 #include "lm/facade.hh"
-#include "lm/max_order.hh"
 #include "lm/quantize.hh"
 #include "lm/search_hashed.hh"
 #include "lm/search_trie.hh"
diff --git a/klm/lm/quantize.hh b/klm/lm/quantize.hh
index 3e9153e3..abed0112 100644
--- a/klm/lm/quantize.hh
+++ b/klm/lm/quantize.hh
@@ -17,7 +17,7 @@
 namespace lm {
 namespace ngram {
 
-class Config;
+struct Config;
 
 /* Store values directly and don't quantize. */
 class DontQuantize {
@@ -217,7 +217,7 @@ class SeparatelyQuantize {
     const Bins &LongestTable() const { return longest_; }
 
   private:
-    Bins tables_[kMaxOrder - 1][2];
+    Bins tables_[KENLM_MAX_ORDER - 1][2];
 
     Bins longest_;
 
diff --git a/klm/lm/read_arpa.cc b/klm/lm/read_arpa.cc
index 2d9a337d..70727e4c 100644
--- a/klm/lm/read_arpa.cc
+++ b/klm/lm/read_arpa.cc
@@ -7,9 +7,14 @@
 #include <vector>
 
 #include <ctype.h>
+#include <math.h>
 #include <string.h>
 #include <stdint.h>
 
+#ifdef WIN32
+#include <float.h>
+#endif
+
 namespace lm {
 
 // 1 for '\t', '\n', and ' '.  This is stricter than isspace.  
@@ -93,7 +98,16 @@ void ReadBackoff(util::FilePiece &in, float &backoff) {
     case '\t':
       backoff = in.ReadFloat();
       if (backoff == ngram::kExtensionBackoff) backoff = ngram::kNoExtensionBackoff;
-      if ((in.get() != '\n')) UTIL_THROW(FormatLoadException, "Expected newline after backoff");
+      {
+#ifdef WIN32
+		int float_class = _fpclass(backoff);
+        UTIL_THROW_IF(float_class == _FPCLASS_SNAN || float_class == _FPCLASS_QNAN || float_class == _FPCLASS_NINF || float_class == _FPCLASS_PINF, FormatLoadException, "Bad backoff " << backoff);
+#else
+        int float_class = fpclassify(backoff);
+        UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << backoff);
+#endif
+      }
+      UTIL_THROW_IF(in.get() != '\n', FormatLoadException, "Expected newline after backoff");
       break;
     case '\n':
       backoff = ngram::kNoExtensionBackoff;
diff --git a/klm/lm/search_trie.cc b/klm/lm/search_trie.cc
index 18e80d5a..832cc9f7 100644
--- a/klm/lm/search_trie.cc
+++ b/klm/lm/search_trie.cc
@@ -180,7 +180,7 @@ const float kBadProb = std::numeric_limits<float>::infinity();
 class SRISucks {
   public:
     SRISucks() {
-      for (BackoffMessages *i = messages_; i != messages_ + kMaxOrder - 1; ++i)
+      for (BackoffMessages *i = messages_; i != messages_ + KENLM_MAX_ORDER - 1; ++i)
         i->Init(sizeof(ProbPointer) + sizeof(WordIndex) * (i - messages_ + 1));
     }
 
@@ -196,7 +196,7 @@ class SRISucks {
     }
 
     void ObtainBackoffs(unsigned char total_order, FILE *unigram_file, RecordReader *reader) {
-      for (unsigned char i = 0; i < kMaxOrder - 1; ++i) {
+      for (unsigned char i = 0; i < KENLM_MAX_ORDER - 1; ++i) {
         it_[i] = values_[i].empty() ? NULL : &*values_[i].begin();
       }
       messages_[0].Apply(it_, unigram_file);
@@ -221,10 +221,10 @@ class SRISucks {
 
   private:
     // This used to be one array.  Then I needed to separate it by order for quantization to work.  
-    std::vector<float> values_[kMaxOrder - 1];
-    BackoffMessages messages_[kMaxOrder - 1];
+    std::vector<float> values_[KENLM_MAX_ORDER - 1];
+    BackoffMessages messages_[KENLM_MAX_ORDER - 1];
 
-    float *it_[kMaxOrder - 1];
+    float *it_[KENLM_MAX_ORDER - 1];
 };
 
 class FindBlanks {
@@ -337,7 +337,7 @@ struct Gram {
 template <class Doing> class BlankManager {
   public:
     BlankManager(unsigned char total_order, Doing &doing) : total_order_(total_order), been_length_(0), doing_(doing) {
-      for (float *i = basis_; i != basis_ + kMaxOrder - 1; ++i) *i = kBadProb;
+      for (float *i = basis_; i != basis_ + KENLM_MAX_ORDER - 1; ++i) *i = kBadProb;
     }
 
     void Visit(const WordIndex *to, unsigned char length, float prob) {
@@ -373,10 +373,10 @@ template <class Doing> class BlankManager {
   private:
     const unsigned char total_order_;
 
-    WordIndex been_[kMaxOrder];
+    WordIndex been_[KENLM_MAX_ORDER];
     unsigned char been_length_;
 
-    float basis_[kMaxOrder];
+    float basis_[KENLM_MAX_ORDER];
     
     Doing &doing_;
 };
@@ -470,8 +470,8 @@ void PopulateUnigramWeights(FILE *file, WordIndex unigram_count, RecordReader &c
 } // namespace
 
 template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::vector<uint64_t> &counts, const Config &config, TrieSearch<Quant, Bhiksha> &out, Quant &quant, const SortedVocabulary &vocab, Backing &backing) {
-  RecordReader inputs[kMaxOrder - 1];
-  RecordReader contexts[kMaxOrder - 1];
+  RecordReader inputs[KENLM_MAX_ORDER - 1];
+  RecordReader contexts[KENLM_MAX_ORDER - 1];
 
   for (unsigned char i = 2; i <= counts.size(); ++i) {
     inputs[i-2].Init(files.Full(i), i * sizeof(WordIndex) + (i == counts.size() ? sizeof(Prob) : sizeof(ProbBackoff)));
diff --git a/klm/lm/state.hh b/klm/lm/state.hh
index c7438414..830e40aa 100644
--- a/klm/lm/state.hh
+++ b/klm/lm/state.hh
@@ -32,7 +32,7 @@ class State {
 
     // Call this before using raw memcmp.  
     void ZeroRemaining() {
-      for (unsigned char i = length; i < kMaxOrder - 1; ++i) {
+      for (unsigned char i = length; i < KENLM_MAX_ORDER - 1; ++i) {
         words[i] = 0;
         backoff[i] = 0.0;
       }
@@ -42,8 +42,8 @@ class State {
 
     // You shouldn't need to touch anything below this line, but the members are public so FullState will qualify as a POD.  
     // This order minimizes total size of the struct if WordIndex is 64 bit, float is 32 bit, and alignment of 64 bit integers is 64 bit.  
-    WordIndex words[kMaxOrder - 1];
-    float backoff[kMaxOrder - 1];
+    WordIndex words[KENLM_MAX_ORDER - 1];
+    float backoff[KENLM_MAX_ORDER - 1];
     unsigned char length;
 };
 
@@ -72,11 +72,11 @@ struct Left {
   }
 
   void ZeroRemaining() {
-    for (uint64_t * i = pointers + length; i < pointers + kMaxOrder - 1; ++i)
+    for (uint64_t * i = pointers + length; i < pointers + KENLM_MAX_ORDER - 1; ++i)
       *i = 0;
   }
 
-  uint64_t pointers[kMaxOrder - 1];
+  uint64_t pointers[KENLM_MAX_ORDER - 1];
   unsigned char length;
   bool full;
 };
diff --git a/klm/lm/trie.hh b/klm/lm/trie.hh
index eff93292..034a1414 100644
--- a/klm/lm/trie.hh
+++ b/klm/lm/trie.hh
@@ -11,7 +11,7 @@
 
 namespace lm {
 namespace ngram {
-class Config;
+struct Config;
 namespace trie {
 
 struct NodeRange {
diff --git a/klm/lm/trie_sort.cc b/klm/lm/trie_sort.cc
index b80fed02..0d83221e 100644
--- a/klm/lm/trie_sort.cc
+++ b/klm/lm/trie_sort.cc
@@ -148,13 +148,17 @@ template <class Combine> FILE *MergeSortedFiles(FILE *first_file, FILE *second_f
 } // namespace
 
 void RecordReader::Init(FILE *file, std::size_t entry_size) {
-  rewind(file);
-  file_ = file;
+  entry_size_ = entry_size;
   data_.reset(malloc(entry_size));
   UTIL_THROW_IF(!data_.get(), util::ErrnoException, "Failed to malloc read buffer");
-  remains_ = true;
-  entry_size_ = entry_size;
-  ++*this;
+  file_ = file;
+  if (file) {
+    rewind(file);
+    remains_ = true;
+    ++*this;
+  } else {
+    remains_ = false;
+  }
 }
 
 void RecordReader::Overwrite(const void *start, std::size_t amount) {
@@ -169,9 +173,13 @@ void RecordReader::Overwrite(const void *start, std::size_t amount) {
 }
 
 void RecordReader::Rewind() {
-  rewind(file_);
-  remains_ = true;
-  ++*this;
+  if (file_) {
+    rewind(file_);
+    remains_ = true;
+    ++*this;
+  } else {
+    remains_ = false;
+  }
 }
 
 SortedFiles::SortedFiles(const Config &config, util::FilePiece &f, std::vector<uint64_t> &counts, size_t buffer, const std::string &file_prefix, SortedVocabulary &vocab) {
diff --git a/klm/lm/trie_sort.hh b/klm/lm/trie_sort.hh
index 3036319d..1e6fce51 100644
--- a/klm/lm/trie_sort.hh
+++ b/klm/lm/trie_sort.hh
@@ -25,7 +25,7 @@ namespace lm {
 class PositiveProbWarn;
 namespace ngram {
 class SortedVocabulary;
-class Config;
+struct Config;
 
 namespace trie {
 
@@ -107,7 +107,7 @@ class SortedFiles {
     
     util::scoped_fd unigram_;
 
-    util::scoped_FILE full_[kMaxOrder - 1], context_[kMaxOrder - 1];
+    util::scoped_FILE full_[KENLM_MAX_ORDER - 1], context_[KENLM_MAX_ORDER - 1];
 };
 
 } // namespace trie
diff --git a/klm/lm/value.hh b/klm/lm/value.hh
index 85e53f14..ba716713 100644
--- a/klm/lm/value.hh
+++ b/klm/lm/value.hh
@@ -6,7 +6,7 @@
 #include "lm/weights.hh"
 #include "util/bit_packing.hh"
 
-#include <inttypes.h>
+#include <stdint.h>
 
 namespace lm {
 namespace ngram {
diff --git a/klm/lm/value_build.hh b/klm/lm/value_build.hh
index 687a41a0..461e6a5c 100644
--- a/klm/lm/value_build.hh
+++ b/klm/lm/value_build.hh
@@ -10,9 +10,9 @@
 namespace lm {
 namespace ngram {
 
-class Config;
-class BackoffValue;
-class RestValue;
+struct Config;
+struct BackoffValue;
+struct RestValue;
 
 class NoRestBuild {
   public:
diff --git a/klm/lm/vocab.hh b/klm/lm/vocab.hh
index c3efcb4a..a25432f9 100644
--- a/klm/lm/vocab.hh
+++ b/klm/lm/vocab.hh
@@ -13,11 +13,11 @@
 #include <vector>
 
 namespace lm {
-class ProbBackoff;
+struct ProbBackoff;
 class EnumerateVocab;
 
 namespace ngram {
-class Config;
+struct Config;
 
 namespace detail {
 uint64_t HashForVocab(const char *str, std::size_t len);
diff --git a/klm/util/file.cc b/klm/util/file.cc
index 6a3885a7..98f13983 100644
--- a/klm/util/file.cc
+++ b/klm/util/file.cc
@@ -44,6 +44,16 @@ int OpenReadOrThrow(const char *name) {
   return ret;
 }
 
+int CreateOrThrow(const char *name) {
+  int ret;
+#if defined(_WIN32) || defined(_WIN64)
+  UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name);
+#else
+  UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name);
+#endif
+  return ret;
+}
+
 uint64_t SizeFile(int fd) {
 #if defined(_WIN32) || defined(_WIN64)
   __int64 ret = _filelengthi64(fd);
diff --git a/klm/util/file.hh b/klm/util/file.hh
index 5c57e2a9..8af1ff4f 100644
--- a/klm/util/file.hh
+++ b/klm/util/file.hh
@@ -65,7 +65,10 @@ class scoped_FILE {
     std::FILE *file_;
 };
 
+// Open for read only.  
 int OpenReadOrThrow(const char *name);
+// Create file if it doesn't exist, truncate if it does.  Opened for write.   
+int CreateOrThrow(const char *name);
 
 // Return value for SizeFile when it can't size properly.  
 const uint64_t kBadSize = (uint64_t)-1;
@@ -91,10 +94,13 @@ class TempMaker {
   public:
     explicit TempMaker(const std::string &prefix);
 
+    // These will already be unlinked for you.  
     int Make() const;
-
     std::FILE *MakeFile() const;
 
+    // This will force you to close the fd instead of leaving it open.  
+    std::string Name(scoped_fd &opened) const;
+
   private:
     std::string base_;
 };
diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc
index a205995a..19a68728 100644
--- a/klm/util/file_piece.cc
+++ b/klm/util/file_piece.cc
@@ -27,7 +27,7 @@ ParseNumberException::ParseNumberException(StringPiece value) throw() {
 #ifdef HAVE_ZLIB
 GZException::GZException(gzFile file) {
   int num;
-  *this << gzerror( file, &num) << " from zlib";
+  *this << gzerror(file, &num) << " from zlib";
 }
 #endif // HAVE_ZLIB
 
diff --git a/klm/util/have.hh b/klm/util/have.hh
index b8181e99..1d76a7fc 100644
--- a/klm/util/have.hh
+++ b/klm/util/have.hh
@@ -13,7 +13,7 @@
 #endif
 
 #ifndef HAVE_BOOST
-#define HAVE_BOOST
+//#define HAVE_BOOST
 #endif
 
 #ifndef HAVE_THREADS
diff --git a/klm/util/mmap.cc b/klm/util/mmap.cc
index 576fd4cc..bc9e3f81 100644
--- a/klm/util/mmap.cc
+++ b/klm/util/mmap.cc
@@ -19,8 +19,8 @@
 #include <windows.h>
 #include <io.h>
 #else
-#include <unistd.h>
 #include <sys/mman.h>
+#include <unistd.h>
 #endif
 
 namespace util {
@@ -171,20 +171,6 @@ void *MapZeroedWrite(int fd, std::size_t size) {
   return MapOrThrow(size, true, kFileFlags, false, fd, 0);
 }
 
-namespace {
-
-int CreateOrThrow(const char *name) {
-  int ret;
-#if defined(_WIN32) || defined(_WIN64)
-  UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name);
-#else
-  UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name);
-#endif
-  return ret;
-}
-
-} // namespace
-
 void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file) {
   file.reset(CreateOrThrow(name));
   try {
diff --git a/klm/util/string_piece.hh b/klm/util/string_piece.hh
index 5de053aa..be6a643d 100644
--- a/klm/util/string_piece.hh
+++ b/klm/util/string_piece.hh
@@ -85,6 +85,11 @@ U_NAMESPACE_BEGIN
 #include <string>
 #include <string.h>
 
+#ifdef WIN32
+#undef max
+#undef min
+#endif
+
 class StringPiece {
  public:
   typedef size_t size_type;
-- 
cgit v1.2.3