diff options
author | Kenneth Heafield <github@kheafield.com> | 2012-10-10 19:02:40 +0100 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2012-10-10 19:02:40 +0100 |
commit | afd7da3b2338661657ad0c4e9eec681e014d37bf (patch) | |
tree | 6e1a274389f0b8a7931f14a33d5ef778e86d90ec /klm/lm/read_arpa.cc | |
parent | f7175481a7ea9b8733bc023016112141e29fd827 (diff) |
KenLM 8307220
Diffstat (limited to 'klm/lm/read_arpa.cc')
-rw-r--r-- | klm/lm/read_arpa.cc | 22 |
1 files changed, 14 insertions, 8 deletions
diff --git a/klm/lm/read_arpa.cc b/klm/lm/read_arpa.cc index 70727e4c..b709fef9 100644 --- a/klm/lm/read_arpa.cc +++ b/klm/lm/read_arpa.cc @@ -2,12 +2,13 @@ #include "lm/blank.hh" +#include <cmath> #include <cstdlib> #include <iostream> +#include <sstream> #include <vector> #include <ctype.h> -#include <math.h> #include <string.h> #include <stdint.h> @@ -31,6 +32,15 @@ bool IsEntirelyWhiteSpace(const StringPiece &line) { const char kBinaryMagic[] = "mmap lm http://kheafield.com/code"; +// strtoull isn't portable enough :-( +uint64_t ReadCount(const std::string &from) { + std::stringstream stream(from); + uint64_t ret; + stream >> ret; + UTIL_THROW_IF(!stream, FormatLoadException, "Bad count " << from); + return ret; +} + } // namespace void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number) { @@ -52,15 +62,11 @@ void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number) { // So strtol doesn't go off the end of line. std::string remaining(line.data() + 6, line.size() - 6); char *end_ptr; - unsigned long int length = std::strtol(remaining.c_str(), &end_ptr, 10); + unsigned int length = std::strtol(remaining.c_str(), &end_ptr, 10); if ((end_ptr == remaining.c_str()) || (length - 1 != number.size())) UTIL_THROW(FormatLoadException, "ngram count lengths should be consecutive starting with 1: " << line); if (*end_ptr != '=') UTIL_THROW(FormatLoadException, "Expected = immediately following the first number in the count line " << line); ++end_ptr; - const char *start = end_ptr; - long int count = std::strtol(start, &end_ptr, 10); - if (count < 0) UTIL_THROW(FormatLoadException, "Negative n-gram count " << count); - if (start == end_ptr) UTIL_THROW(FormatLoadException, "Couldn't parse n-gram count from " << line); - number.push_back(count); + number.push_back(ReadCount(end_ptr)); } } @@ -103,7 +109,7 @@ void ReadBackoff(util::FilePiece &in, float &backoff) { int float_class = _fpclass(backoff); UTIL_THROW_IF(float_class == _FPCLASS_SNAN || float_class == _FPCLASS_QNAN || float_class == _FPCLASS_NINF || float_class == _FPCLASS_PINF, FormatLoadException, "Bad backoff " << backoff); #else - int float_class = fpclassify(backoff); + int float_class = std::fpclassify(backoff); UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << backoff); #endif } |