summaryrefslogtreecommitdiff
path: root/klm/util
diff options
context:
space:
mode:
Diffstat (limited to 'klm/util')
-rw-r--r--klm/util/Jamfile14
-rw-r--r--klm/util/ersatz_progress.cc10
-rw-r--r--klm/util/ersatz_progress.hh10
-rw-r--r--klm/util/exception.cc3
-rw-r--r--klm/util/exception.hh22
-rw-r--r--klm/util/file.cc7
-rw-r--r--klm/util/file_piece.cc1
-rw-r--r--klm/util/probing_hash_table.hh5
8 files changed, 52 insertions, 20 deletions
diff --git a/klm/util/Jamfile b/klm/util/Jamfile
index 3ee2c2c2..a939265f 100644
--- a/klm/util/Jamfile
+++ b/klm/util/Jamfile
@@ -1,10 +1,10 @@
-lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc usage.cc ../..//z : <include>.. : : <include>.. ;
+lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc usage.cc /top//z : <include>.. : : <include>.. ;
import testing ;
-unit-test bit_packing_test : bit_packing_test.cc kenutil ../..///boost_unit_test_framework ;
-run file_piece_test.cc kenutil ../..///boost_unit_test_framework : : file_piece.cc ;
-unit-test joint_sort_test : joint_sort_test.cc kenutil ../..///boost_unit_test_framework ;
-unit-test probing_hash_table_test : probing_hash_table_test.cc kenutil ../..///boost_unit_test_framework ;
-unit-test sorted_uniform_test : sorted_uniform_test.cc kenutil ../..///boost_unit_test_framework ;
-unit-test tokenize_piece_test : tokenize_piece_test.cc kenutil ../..///boost_unit_test_framework ;
+unit-test bit_packing_test : bit_packing_test.cc kenutil /top//boost_unit_test_framework ;
+run file_piece_test.cc kenutil /top//boost_unit_test_framework : : file_piece.cc ;
+unit-test joint_sort_test : joint_sort_test.cc kenutil /top//boost_unit_test_framework ;
+unit-test probing_hash_table_test : probing_hash_table_test.cc kenutil /top//boost_unit_test_framework ;
+unit-test sorted_uniform_test : sorted_uniform_test.cc kenutil /top//boost_unit_test_framework ;
+unit-test tokenize_piece_test : tokenize_piece_test.cc kenutil /top//boost_unit_test_framework ;
diff --git a/klm/util/ersatz_progress.cc b/klm/util/ersatz_progress.cc
index 07b14e26..eb635ad8 100644
--- a/klm/util/ersatz_progress.cc
+++ b/klm/util/ersatz_progress.cc
@@ -9,16 +9,16 @@ namespace util {
namespace { const unsigned char kWidth = 100; }
-ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<std::size_t>::max()), complete_(next_), out_(NULL) {}
+ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<uint64_t>::max()), complete_(next_), out_(NULL) {}
ErsatzProgress::~ErsatzProgress() {
if (out_) Finished();
}
-ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std::string &message)
+ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::string &message)
: current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) {
if (!out_) {
- next_ = std::numeric_limits<std::size_t>::max();
+ next_ = std::numeric_limits<uint64_t>::max();
return;
}
if (!message.empty()) *out_ << message << '\n';
@@ -28,14 +28,14 @@ ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std
void ErsatzProgress::Milestone() {
if (!out_) { current_ = 0; return; }
if (!complete_) return;
- unsigned char stone = std::min(static_cast<std::size_t>(kWidth), (current_ * kWidth) / complete_);
+ unsigned char stone = std::min(static_cast<uint64_t>(kWidth), (current_ * kWidth) / complete_);
for (; stones_written_ < stone; ++stones_written_) {
(*out_) << '*';
}
if (stone == kWidth) {
(*out_) << std::endl;
- next_ = std::numeric_limits<std::size_t>::max();
+ next_ = std::numeric_limits<uint64_t>::max();
out_ = NULL;
} else {
next_ = std::max(next_, (stone * complete_) / kWidth);
diff --git a/klm/util/ersatz_progress.hh b/klm/util/ersatz_progress.hh
index f709dc51..ff4d590f 100644
--- a/klm/util/ersatz_progress.hh
+++ b/klm/util/ersatz_progress.hh
@@ -4,6 +4,8 @@
#include <iostream>
#include <string>
+#include <inttypes.h>
+
// Ersatz version of boost::progress so core language model doesn't depend on
// boost. Also adds option to print nothing.
@@ -14,7 +16,7 @@ class ErsatzProgress {
ErsatzProgress();
// Null means no output. The null value is useful for passing along the ostream pointer from another caller.
- explicit ErsatzProgress(std::size_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
+ explicit ErsatzProgress(uint64_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
~ErsatzProgress();
@@ -23,12 +25,12 @@ class ErsatzProgress {
return *this;
}
- ErsatzProgress &operator+=(std::size_t amount) {
+ ErsatzProgress &operator+=(uint64_t amount) {
if ((current_ += amount) >= next_) Milestone();
return *this;
}
- void Set(std::size_t to) {
+ void Set(uint64_t to) {
if ((current_ = to) >= next_) Milestone();
Milestone();
}
@@ -40,7 +42,7 @@ class ErsatzProgress {
private:
void Milestone();
- std::size_t current_, next_, complete_;
+ uint64_t current_, next_, complete_;
unsigned char stones_written_;
std::ostream *out_;
diff --git a/klm/util/exception.cc b/klm/util/exception.cc
index c4f8c04c..3806e6de 100644
--- a/klm/util/exception.cc
+++ b/klm/util/exception.cc
@@ -84,4 +84,7 @@ EndOfFileException::EndOfFileException() throw() {
}
EndOfFileException::~EndOfFileException() throw() {}
+OverflowException::OverflowException() throw() {}
+OverflowException::~OverflowException() throw() {}
+
} // namespace util
diff --git a/klm/util/exception.hh b/klm/util/exception.hh
index 6d6a37cb..83f99cd6 100644
--- a/klm/util/exception.hh
+++ b/klm/util/exception.hh
@@ -2,9 +2,12 @@
#define UTIL_EXCEPTION__
#include <exception>
+#include <limits>
#include <sstream>
#include <string>
+#include <inttypes.h>
+
namespace util {
template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
@@ -111,6 +114,25 @@ class EndOfFileException : public Exception {
~EndOfFileException() throw();
};
+class OverflowException : public Exception {
+ public:
+ OverflowException() throw();
+ ~OverflowException() throw();
+};
+
+template <unsigned len> inline std::size_t CheckOverflowInternal(uint64_t value) {
+ UTIL_THROW_IF(value > static_cast<uint64_t>(std::numeric_limits<std::size_t>::max()), OverflowException, "Integer overflow detected. This model is too big for 32-bit code.");
+ return value;
+}
+
+template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) {
+ return value;
+}
+
+inline std::size_t CheckOverflow(uint64_t value) {
+ return CheckOverflowInternal<sizeof(std::size_t)>(value);
+}
+
} // namespace util
#endif // UTIL_EXCEPTION__
diff --git a/klm/util/file.cc b/klm/util/file.cc
index 98f13983..ff5e64c9 100644
--- a/klm/util/file.cc
+++ b/klm/util/file.cc
@@ -119,8 +119,13 @@ void FSyncOrThrow(int fd) {
}
namespace {
-void InternalSeek(int fd, off_t off, int whence) {
+void InternalSeek(int fd, int64_t off, int whence) {
+#if defined(_WIN32) || defined(_WIN64)
+ UTIL_THROW_IF((__int64)-1 == _lseeki64(fd, off, whence), ErrnoException, "Windows seek failed");
+
+#else
UTIL_THROW_IF((off_t)-1 == lseek(fd, off, whence), ErrnoException, "Seek failed");
+#endif
}
} // namespace
diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc
index af341d6d..19a68728 100644
--- a/klm/util/file_piece.cc
+++ b/klm/util/file_piece.cc
@@ -11,7 +11,6 @@
#include <string>
#include <limits>
-#include <unistd.h>
#include <assert.h>
#include <ctype.h>
#include <fcntl.h>
diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh
index 3354b68e..770faa7e 100644
--- a/klm/util/probing_hash_table.hh
+++ b/klm/util/probing_hash_table.hh
@@ -8,6 +8,7 @@
#include <functional>
#include <assert.h>
+#include <inttypes.h>
namespace util {
@@ -42,8 +43,8 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
typedef EqualT Equal;
public:
- static std::size_t Size(std::size_t entries, float multiplier) {
- std::size_t buckets = std::max(entries + 1, static_cast<std::size_t>(multiplier * static_cast<float>(entries)));
+ static uint64_t Size(uint64_t entries, float multiplier) {
+ uint64_t buckets = std::max(entries + 1, static_cast<uint64_t>(multiplier * static_cast<float>(entries)));
return buckets * sizeof(Entry);
}