summaryrefslogtreecommitdiff
path: root/klm/lm/read_arpa.cc
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-11-05 15:29:46 +0100
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-11-05 15:29:46 +0100
commit1db70a45d59946560fbd5db6487b55a8674ef973 (patch)
tree172585dafe4d1462f22d8200e733d52dddb55b1e /klm/lm/read_arpa.cc
parent4dd5216d3afa9ab72b150e250a3c30a5f223ce53 (diff)
parent6bbf03ac46bd57400aa9e65a321a304a234af935 (diff)
merge upstream/master
Diffstat (limited to 'klm/lm/read_arpa.cc')
-rw-r--r--klm/lm/read_arpa.cc34
1 files changed, 27 insertions, 7 deletions
diff --git a/klm/lm/read_arpa.cc b/klm/lm/read_arpa.cc
index 2d9a337d..b709fef9 100644
--- a/klm/lm/read_arpa.cc
+++ b/klm/lm/read_arpa.cc
@@ -2,14 +2,20 @@
#include "lm/blank.hh"
+#include <cmath>
#include <cstdlib>
#include <iostream>
+#include <sstream>
#include <vector>
#include <ctype.h>
#include <string.h>
#include <stdint.h>
+#ifdef WIN32
+#include <float.h>
+#endif
+
namespace lm {
// 1 for '\t', '\n', and ' '. This is stricter than isspace.
@@ -26,6 +32,15 @@ bool IsEntirelyWhiteSpace(const StringPiece &line) {
const char kBinaryMagic[] = "mmap lm http://kheafield.com/code";
+// strtoull isn't portable enough :-(
+uint64_t ReadCount(const std::string &from) {
+ std::stringstream stream(from);
+ uint64_t ret;
+ stream >> ret;
+ UTIL_THROW_IF(!stream, FormatLoadException, "Bad count " << from);
+ return ret;
+}
+
} // namespace
void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number) {
@@ -47,15 +62,11 @@ void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number) {
// So strtol doesn't go off the end of line.
std::string remaining(line.data() + 6, line.size() - 6);
char *end_ptr;
- unsigned long int length = std::strtol(remaining.c_str(), &end_ptr, 10);
+ unsigned int length = std::strtol(remaining.c_str(), &end_ptr, 10);
if ((end_ptr == remaining.c_str()) || (length - 1 != number.size())) UTIL_THROW(FormatLoadException, "ngram count lengths should be consecutive starting with 1: " << line);
if (*end_ptr != '=') UTIL_THROW(FormatLoadException, "Expected = immediately following the first number in the count line " << line);
++end_ptr;
- const char *start = end_ptr;
- long int count = std::strtol(start, &end_ptr, 10);
- if (count < 0) UTIL_THROW(FormatLoadException, "Negative n-gram count " << count);
- if (start == end_ptr) UTIL_THROW(FormatLoadException, "Couldn't parse n-gram count from " << line);
- number.push_back(count);
+ number.push_back(ReadCount(end_ptr));
}
}
@@ -93,7 +104,16 @@ void ReadBackoff(util::FilePiece &in, float &backoff) {
case '\t':
backoff = in.ReadFloat();
if (backoff == ngram::kExtensionBackoff) backoff = ngram::kNoExtensionBackoff;
- if ((in.get() != '\n')) UTIL_THROW(FormatLoadException, "Expected newline after backoff");
+ {
+#ifdef WIN32
+ int float_class = _fpclass(backoff);
+ UTIL_THROW_IF(float_class == _FPCLASS_SNAN || float_class == _FPCLASS_QNAN || float_class == _FPCLASS_NINF || float_class == _FPCLASS_PINF, FormatLoadException, "Bad backoff " << backoff);
+#else
+ int float_class = std::fpclassify(backoff);
+ UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << backoff);
+#endif
+ }
+ UTIL_THROW_IF(in.get() != '\n', FormatLoadException, "Expected newline after backoff");
break;
case '\n':
backoff = ngram::kNoExtensionBackoff;