From 8882e9ebe158aef382bb5544559ef7f2a553db62 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Tue, 11 Sep 2012 14:23:39 +0100 Subject: Update kenlm and build system --- klm/util/Jamfile | 14 +++++++------- klm/util/ersatz_progress.cc | 10 +++++----- klm/util/ersatz_progress.hh | 10 ++++++---- klm/util/exception.cc | 3 +++ klm/util/exception.hh | 22 ++++++++++++++++++++++ klm/util/file.cc | 7 ++++++- klm/util/file_piece.cc | 1 - klm/util/probing_hash_table.hh | 5 +++-- 8 files changed, 52 insertions(+), 20 deletions(-) (limited to 'klm/util') diff --git a/klm/util/Jamfile b/klm/util/Jamfile index 3ee2c2c2..a939265f 100644 --- a/klm/util/Jamfile +++ b/klm/util/Jamfile @@ -1,10 +1,10 @@ -lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc usage.cc ../..//z : .. : : .. ; +lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc usage.cc /top//z : .. : : .. ; import testing ; -unit-test bit_packing_test : bit_packing_test.cc kenutil ../..///boost_unit_test_framework ; -run file_piece_test.cc kenutil ../..///boost_unit_test_framework : : file_piece.cc ; -unit-test joint_sort_test : joint_sort_test.cc kenutil ../..///boost_unit_test_framework ; -unit-test probing_hash_table_test : probing_hash_table_test.cc kenutil ../..///boost_unit_test_framework ; -unit-test sorted_uniform_test : sorted_uniform_test.cc kenutil ../..///boost_unit_test_framework ; -unit-test tokenize_piece_test : tokenize_piece_test.cc kenutil ../..///boost_unit_test_framework ; +unit-test bit_packing_test : bit_packing_test.cc kenutil /top//boost_unit_test_framework ; +run file_piece_test.cc kenutil /top//boost_unit_test_framework : : file_piece.cc ; +unit-test joint_sort_test : joint_sort_test.cc kenutil /top//boost_unit_test_framework ; +unit-test probing_hash_table_test : probing_hash_table_test.cc kenutil /top//boost_unit_test_framework ; +unit-test sorted_uniform_test : sorted_uniform_test.cc kenutil /top//boost_unit_test_framework ; +unit-test tokenize_piece_test : tokenize_piece_test.cc kenutil /top//boost_unit_test_framework ; diff --git a/klm/util/ersatz_progress.cc b/klm/util/ersatz_progress.cc index 07b14e26..eb635ad8 100644 --- a/klm/util/ersatz_progress.cc +++ b/klm/util/ersatz_progress.cc @@ -9,16 +9,16 @@ namespace util { namespace { const unsigned char kWidth = 100; } -ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits::max()), complete_(next_), out_(NULL) {} +ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits::max()), complete_(next_), out_(NULL) {} ErsatzProgress::~ErsatzProgress() { if (out_) Finished(); } -ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std::string &message) +ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::string &message) : current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) { if (!out_) { - next_ = std::numeric_limits::max(); + next_ = std::numeric_limits::max(); return; } if (!message.empty()) *out_ << message << '\n'; @@ -28,14 +28,14 @@ ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std void ErsatzProgress::Milestone() { if (!out_) { current_ = 0; return; } if (!complete_) return; - unsigned char stone = std::min(static_cast(kWidth), (current_ * kWidth) / complete_); + unsigned char stone = std::min(static_cast(kWidth), (current_ * kWidth) / complete_); for (; stones_written_ < stone; ++stones_written_) { (*out_) << '*'; } if (stone == kWidth) { (*out_) << std::endl; - next_ = std::numeric_limits::max(); + next_ = std::numeric_limits::max(); out_ = NULL; } else { next_ = std::max(next_, (stone * complete_) / kWidth); diff --git a/klm/util/ersatz_progress.hh b/klm/util/ersatz_progress.hh index f709dc51..ff4d590f 100644 --- a/klm/util/ersatz_progress.hh +++ b/klm/util/ersatz_progress.hh @@ -4,6 +4,8 @@ #include #include +#include + // Ersatz version of boost::progress so core language model doesn't depend on // boost. Also adds option to print nothing. @@ -14,7 +16,7 @@ class ErsatzProgress { ErsatzProgress(); // Null means no output. The null value is useful for passing along the ostream pointer from another caller. - explicit ErsatzProgress(std::size_t complete, std::ostream *to = &std::cerr, const std::string &message = ""); + explicit ErsatzProgress(uint64_t complete, std::ostream *to = &std::cerr, const std::string &message = ""); ~ErsatzProgress(); @@ -23,12 +25,12 @@ class ErsatzProgress { return *this; } - ErsatzProgress &operator+=(std::size_t amount) { + ErsatzProgress &operator+=(uint64_t amount) { if ((current_ += amount) >= next_) Milestone(); return *this; } - void Set(std::size_t to) { + void Set(uint64_t to) { if ((current_ = to) >= next_) Milestone(); Milestone(); } @@ -40,7 +42,7 @@ class ErsatzProgress { private: void Milestone(); - std::size_t current_, next_, complete_; + uint64_t current_, next_, complete_; unsigned char stones_written_; std::ostream *out_; diff --git a/klm/util/exception.cc b/klm/util/exception.cc index c4f8c04c..3806e6de 100644 --- a/klm/util/exception.cc +++ b/klm/util/exception.cc @@ -84,4 +84,7 @@ EndOfFileException::EndOfFileException() throw() { } EndOfFileException::~EndOfFileException() throw() {} +OverflowException::OverflowException() throw() {} +OverflowException::~OverflowException() throw() {} + } // namespace util diff --git a/klm/util/exception.hh b/klm/util/exception.hh index 6d6a37cb..83f99cd6 100644 --- a/klm/util/exception.hh +++ b/klm/util/exception.hh @@ -2,9 +2,12 @@ #define UTIL_EXCEPTION__ #include +#include #include #include +#include + namespace util { template typename Except::template ExceptionTag::Identity operator<<(Except &e, const Data &data); @@ -111,6 +114,25 @@ class EndOfFileException : public Exception { ~EndOfFileException() throw(); }; +class OverflowException : public Exception { + public: + OverflowException() throw(); + ~OverflowException() throw(); +}; + +template inline std::size_t CheckOverflowInternal(uint64_t value) { + UTIL_THROW_IF(value > static_cast(std::numeric_limits::max()), OverflowException, "Integer overflow detected. This model is too big for 32-bit code."); + return value; +} + +template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) { + return value; +} + +inline std::size_t CheckOverflow(uint64_t value) { + return CheckOverflowInternal(value); +} + } // namespace util #endif // UTIL_EXCEPTION__ diff --git a/klm/util/file.cc b/klm/util/file.cc index 98f13983..ff5e64c9 100644 --- a/klm/util/file.cc +++ b/klm/util/file.cc @@ -119,8 +119,13 @@ void FSyncOrThrow(int fd) { } namespace { -void InternalSeek(int fd, off_t off, int whence) { +void InternalSeek(int fd, int64_t off, int whence) { +#if defined(_WIN32) || defined(_WIN64) + UTIL_THROW_IF((__int64)-1 == _lseeki64(fd, off, whence), ErrnoException, "Windows seek failed"); + +#else UTIL_THROW_IF((off_t)-1 == lseek(fd, off, whence), ErrnoException, "Seek failed"); +#endif } } // namespace diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index af341d6d..19a68728 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -11,7 +11,6 @@ #include #include -#include #include #include #include diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh index 3354b68e..770faa7e 100644 --- a/klm/util/probing_hash_table.hh +++ b/klm/util/probing_hash_table.hh @@ -8,6 +8,7 @@ #include #include +#include namespace util { @@ -42,8 +43,8 @@ template (multiplier * static_cast(entries))); + static uint64_t Size(uint64_t entries, float multiplier) { + uint64_t buckets = std::max(entries + 1, static_cast(multiplier * static_cast(entries))); return buckets * sizeof(Entry); } -- cgit v1.2.3