From a36fcc5d55c1de84ae68c1091ebff2b1c32dc3b7 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 10 Oct 2012 19:02:40 +0100 Subject: KenLM 8307220 --- klm/util/ersatz_progress.cc | 10 +++++----- klm/util/ersatz_progress.hh | 10 ++++++---- klm/util/exception.cc | 3 +++ klm/util/exception.hh | 22 ++++++++++++++++++++++ klm/util/file.cc | 34 +++++++++++++++++++++++++++------- klm/util/file.hh | 6 +++--- klm/util/file_piece.cc | 3 ++- klm/util/probing_hash_table.hh | 5 +++-- 8 files changed, 71 insertions(+), 22 deletions(-) (limited to 'klm/util') diff --git a/klm/util/ersatz_progress.cc b/klm/util/ersatz_progress.cc index 07b14e26..eb635ad8 100644 --- a/klm/util/ersatz_progress.cc +++ b/klm/util/ersatz_progress.cc @@ -9,16 +9,16 @@ namespace util { namespace { const unsigned char kWidth = 100; } -ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits::max()), complete_(next_), out_(NULL) {} +ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits::max()), complete_(next_), out_(NULL) {} ErsatzProgress::~ErsatzProgress() { if (out_) Finished(); } -ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std::string &message) +ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::string &message) : current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) { if (!out_) { - next_ = std::numeric_limits::max(); + next_ = std::numeric_limits::max(); return; } if (!message.empty()) *out_ << message << '\n'; @@ -28,14 +28,14 @@ ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std void ErsatzProgress::Milestone() { if (!out_) { current_ = 0; return; } if (!complete_) return; - unsigned char stone = std::min(static_cast(kWidth), (current_ * kWidth) / complete_); + unsigned char stone = std::min(static_cast(kWidth), (current_ * kWidth) / complete_); for (; stones_written_ < stone; ++stones_written_) { (*out_) << '*'; } if (stone == kWidth) { (*out_) << std::endl; - next_ = std::numeric_limits::max(); + next_ = std::numeric_limits::max(); out_ = NULL; } else { next_ = std::max(next_, (stone * complete_) / kWidth); diff --git a/klm/util/ersatz_progress.hh b/klm/util/ersatz_progress.hh index f709dc51..ff4d590f 100644 --- a/klm/util/ersatz_progress.hh +++ b/klm/util/ersatz_progress.hh @@ -4,6 +4,8 @@ #include #include +#include + // Ersatz version of boost::progress so core language model doesn't depend on // boost. Also adds option to print nothing. @@ -14,7 +16,7 @@ class ErsatzProgress { ErsatzProgress(); // Null means no output. The null value is useful for passing along the ostream pointer from another caller. - explicit ErsatzProgress(std::size_t complete, std::ostream *to = &std::cerr, const std::string &message = ""); + explicit ErsatzProgress(uint64_t complete, std::ostream *to = &std::cerr, const std::string &message = ""); ~ErsatzProgress(); @@ -23,12 +25,12 @@ class ErsatzProgress { return *this; } - ErsatzProgress &operator+=(std::size_t amount) { + ErsatzProgress &operator+=(uint64_t amount) { if ((current_ += amount) >= next_) Milestone(); return *this; } - void Set(std::size_t to) { + void Set(uint64_t to) { if ((current_ = to) >= next_) Milestone(); Milestone(); } @@ -40,7 +42,7 @@ class ErsatzProgress { private: void Milestone(); - std::size_t current_, next_, complete_; + uint64_t current_, next_, complete_; unsigned char stones_written_; std::ostream *out_; diff --git a/klm/util/exception.cc b/klm/util/exception.cc index c4f8c04c..3806e6de 100644 --- a/klm/util/exception.cc +++ b/klm/util/exception.cc @@ -84,4 +84,7 @@ EndOfFileException::EndOfFileException() throw() { } EndOfFileException::~EndOfFileException() throw() {} +OverflowException::OverflowException() throw() {} +OverflowException::~OverflowException() throw() {} + } // namespace util diff --git a/klm/util/exception.hh b/klm/util/exception.hh index 6d6a37cb..83f99cd6 100644 --- a/klm/util/exception.hh +++ b/klm/util/exception.hh @@ -2,9 +2,12 @@ #define UTIL_EXCEPTION__ #include +#include #include #include +#include + namespace util { template typename Except::template ExceptionTag::Identity operator<<(Except &e, const Data &data); @@ -111,6 +114,25 @@ class EndOfFileException : public Exception { ~EndOfFileException() throw(); }; +class OverflowException : public Exception { + public: + OverflowException() throw(); + ~OverflowException() throw(); +}; + +template inline std::size_t CheckOverflowInternal(uint64_t value) { + UTIL_THROW_IF(value > static_cast(std::numeric_limits::max()), OverflowException, "Integer overflow detected. This model is too big for 32-bit code."); + return value; +} + +template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) { + return value; +} + +inline std::size_t CheckOverflow(uint64_t value) { + return CheckOverflowInternal(value); +} + } // namespace util #endif // UTIL_EXCEPTION__ diff --git a/klm/util/file.cc b/klm/util/file.cc index 98f13983..6bf879ac 100644 --- a/klm/util/file.cc +++ b/klm/util/file.cc @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -111,6 +112,11 @@ void WriteOrThrow(int fd, const void *data_void, std::size_t size) { } } +void WriteOrThrow(FILE *to, const void *data, std::size_t size) { + assert(size); + if (1 != std::fwrite(data, size, 1, to)) UTIL_THROW(util::ErrnoException, "Short write; requested size " << size); +} + void FSyncOrThrow(int fd) { // Apparently windows doesn't have fsync? #if !defined(_WIN32) && !defined(_WIN64) @@ -119,8 +125,13 @@ void FSyncOrThrow(int fd) { } namespace { -void InternalSeek(int fd, off_t off, int whence) { +void InternalSeek(int fd, int64_t off, int whence) { +#if defined(_WIN32) || defined(_WIN64) + UTIL_THROW_IF((__int64)-1 == _lseeki64(fd, off, whence), ErrnoException, "Windows seek failed"); + +#else UTIL_THROW_IF((off_t)-1 == lseek(fd, off, whence), ErrnoException, "Seek failed"); +#endif } } // namespace @@ -143,6 +154,12 @@ std::FILE *FDOpenOrThrow(scoped_fd &file) { return ret; } +std::FILE *FOpenOrThrow(const char *path, const char *mode) { + std::FILE *ret; + UTIL_THROW_IF(!(ret = fopen(path, mode)), util::ErrnoException, "Could not fopen " << path << " for " << mode); + return ret; +} + TempMaker::TempMaker(const std::string &prefix) : base_(prefix) { base_ += "XXXXXX"; } @@ -242,7 +259,9 @@ mkstemp_and_unlink(char *tmpl) /* Modified for windows and to unlink */ // fd = open (tmpl, O_RDWR | O_CREAT | O_EXCL, _S_IREAD | _S_IWRITE); - fd = _open (tmpl, _O_RDWR | _O_CREAT | _O_TEMPORARY | _O_EXCL | _O_BINARY, _S_IREAD | _S_IWRITE); + int flags = _O_RDWR | _O_CREAT | _O_EXCL | _O_BINARY; + flags |= _O_TEMPORARY; + fd = _open (tmpl, flags, _S_IREAD | _S_IWRITE); if (fd >= 0) { errno = save_errno; @@ -260,17 +279,18 @@ mkstemp_and_unlink(char *tmpl) int mkstemp_and_unlink(char *tmpl) { int ret = mkstemp(tmpl); - if (ret == -1) return -1; - UTIL_THROW_IF(unlink(tmpl), util::ErrnoException, "Failed to delete " << tmpl); + if (ret != -1) { + UTIL_THROW_IF(unlink(tmpl), util::ErrnoException, "Failed to delete " << tmpl); + } return ret; } #endif int TempMaker::Make() const { - std::string copy(base_); - copy.push_back(0); + std::string name(base_); + name.push_back(0); int ret; - UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(©[0])), util::ErrnoException, "Failed to make a temporary based on " << base_); + UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&name[0])), util::ErrnoException, "Failed to make a temporary based on " << base_); return ret; } diff --git a/klm/util/file.hh b/klm/util/file.hh index 8af1ff4f..185cb1f3 100644 --- a/klm/util/file.hh +++ b/klm/util/file.hh @@ -80,6 +80,7 @@ void ReadOrThrow(int fd, void *to, std::size_t size); std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount); void WriteOrThrow(int fd, const void *data_void, std::size_t size); +void WriteOrThrow(FILE *to, const void *data, std::size_t size); void FSyncOrThrow(int fd); @@ -90,6 +91,8 @@ void SeekEnd(int fd); std::FILE *FDOpenOrThrow(scoped_fd &file); +std::FILE *FOpenOrThrow(const char *path, const char *mode); + class TempMaker { public: explicit TempMaker(const std::string &prefix); @@ -98,9 +101,6 @@ class TempMaker { int Make() const; std::FILE *MakeFile() const; - // This will force you to close the fd instead of leaving it open. - std::string Name(scoped_fd &opened) const; - private: std::string base_; }; diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index af341d6d..280f438c 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -5,13 +5,14 @@ #include "util/mmap.hh" #ifdef WIN32 #include +#else +#include #endif // WIN32 #include #include #include -#include #include #include #include diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh index 3354b68e..770faa7e 100644 --- a/klm/util/probing_hash_table.hh +++ b/klm/util/probing_hash_table.hh @@ -8,6 +8,7 @@ #include #include +#include namespace util { @@ -42,8 +43,8 @@ template (multiplier * static_cast(entries))); + static uint64_t Size(uint64_t entries, float multiplier) { + uint64_t buckets = std::max(entries + 1, static_cast(multiplier * static_cast(entries))); return buckets * sizeof(Entry); } -- cgit v1.2.3