summaryrefslogtreecommitdiff
path: root/klm/util
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-10-11 14:06:32 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-10-11 14:06:32 -0400
commit07ea7b64b6f85e5798a8068453ed9fd2b97396db (patch)
tree644496a1690d84d82a396bbc1e39160788beb2cd /klm/util
parent37b9e45e5cb29d708f7249dbe0b0fb27685282a0 (diff)
parenta36fcc5d55c1de84ae68c1091ebff2b1c32dc3b7 (diff)
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'klm/util')
-rw-r--r--klm/util/Jamfile10
-rw-r--r--klm/util/ersatz_progress.cc10
-rw-r--r--klm/util/ersatz_progress.hh10
-rw-r--r--klm/util/exception.cc3
-rw-r--r--klm/util/exception.hh22
-rw-r--r--klm/util/file.cc34
-rw-r--r--klm/util/file.hh6
-rw-r--r--klm/util/file_piece.cc3
-rw-r--r--klm/util/probing_hash_table.hh5
9 files changed, 71 insertions, 32 deletions
diff --git a/klm/util/Jamfile b/klm/util/Jamfile
deleted file mode 100644
index 3ee2c2c2..00000000
--- a/klm/util/Jamfile
+++ /dev/null
@@ -1,10 +0,0 @@
-lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc usage.cc ../..//z : <include>.. : : <include>.. ;
-
-import testing ;
-
-unit-test bit_packing_test : bit_packing_test.cc kenutil ../..///boost_unit_test_framework ;
-run file_piece_test.cc kenutil ../..///boost_unit_test_framework : : file_piece.cc ;
-unit-test joint_sort_test : joint_sort_test.cc kenutil ../..///boost_unit_test_framework ;
-unit-test probing_hash_table_test : probing_hash_table_test.cc kenutil ../..///boost_unit_test_framework ;
-unit-test sorted_uniform_test : sorted_uniform_test.cc kenutil ../..///boost_unit_test_framework ;
-unit-test tokenize_piece_test : tokenize_piece_test.cc kenutil ../..///boost_unit_test_framework ;
diff --git a/klm/util/ersatz_progress.cc b/klm/util/ersatz_progress.cc
index 07b14e26..eb635ad8 100644
--- a/klm/util/ersatz_progress.cc
+++ b/klm/util/ersatz_progress.cc
@@ -9,16 +9,16 @@ namespace util {
namespace { const unsigned char kWidth = 100; }
-ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<std::size_t>::max()), complete_(next_), out_(NULL) {}
+ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<uint64_t>::max()), complete_(next_), out_(NULL) {}
ErsatzProgress::~ErsatzProgress() {
if (out_) Finished();
}
-ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std::string &message)
+ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::string &message)
: current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) {
if (!out_) {
- next_ = std::numeric_limits<std::size_t>::max();
+ next_ = std::numeric_limits<uint64_t>::max();
return;
}
if (!message.empty()) *out_ << message << '\n';
@@ -28,14 +28,14 @@ ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std
void ErsatzProgress::Milestone() {
if (!out_) { current_ = 0; return; }
if (!complete_) return;
- unsigned char stone = std::min(static_cast<std::size_t>(kWidth), (current_ * kWidth) / complete_);
+ unsigned char stone = std::min(static_cast<uint64_t>(kWidth), (current_ * kWidth) / complete_);
for (; stones_written_ < stone; ++stones_written_) {
(*out_) << '*';
}
if (stone == kWidth) {
(*out_) << std::endl;
- next_ = std::numeric_limits<std::size_t>::max();
+ next_ = std::numeric_limits<uint64_t>::max();
out_ = NULL;
} else {
next_ = std::max(next_, (stone * complete_) / kWidth);
diff --git a/klm/util/ersatz_progress.hh b/klm/util/ersatz_progress.hh
index f709dc51..ff4d590f 100644
--- a/klm/util/ersatz_progress.hh
+++ b/klm/util/ersatz_progress.hh
@@ -4,6 +4,8 @@
#include <iostream>
#include <string>
+#include <inttypes.h>
+
// Ersatz version of boost::progress so core language model doesn't depend on
// boost. Also adds option to print nothing.
@@ -14,7 +16,7 @@ class ErsatzProgress {
ErsatzProgress();
// Null means no output. The null value is useful for passing along the ostream pointer from another caller.
- explicit ErsatzProgress(std::size_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
+ explicit ErsatzProgress(uint64_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
~ErsatzProgress();
@@ -23,12 +25,12 @@ class ErsatzProgress {
return *this;
}
- ErsatzProgress &operator+=(std::size_t amount) {
+ ErsatzProgress &operator+=(uint64_t amount) {
if ((current_ += amount) >= next_) Milestone();
return *this;
}
- void Set(std::size_t to) {
+ void Set(uint64_t to) {
if ((current_ = to) >= next_) Milestone();
Milestone();
}
@@ -40,7 +42,7 @@ class ErsatzProgress {
private:
void Milestone();
- std::size_t current_, next_, complete_;
+ uint64_t current_, next_, complete_;
unsigned char stones_written_;
std::ostream *out_;
diff --git a/klm/util/exception.cc b/klm/util/exception.cc
index c4f8c04c..3806e6de 100644
--- a/klm/util/exception.cc
+++ b/klm/util/exception.cc
@@ -84,4 +84,7 @@ EndOfFileException::EndOfFileException() throw() {
}
EndOfFileException::~EndOfFileException() throw() {}
+OverflowException::OverflowException() throw() {}
+OverflowException::~OverflowException() throw() {}
+
} // namespace util
diff --git a/klm/util/exception.hh b/klm/util/exception.hh
index 6d6a37cb..83f99cd6 100644
--- a/klm/util/exception.hh
+++ b/klm/util/exception.hh
@@ -2,9 +2,12 @@
#define UTIL_EXCEPTION__
#include <exception>
+#include <limits>
#include <sstream>
#include <string>
+#include <inttypes.h>
+
namespace util {
template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
@@ -111,6 +114,25 @@ class EndOfFileException : public Exception {
~EndOfFileException() throw();
};
+class OverflowException : public Exception {
+ public:
+ OverflowException() throw();
+ ~OverflowException() throw();
+};
+
+template <unsigned len> inline std::size_t CheckOverflowInternal(uint64_t value) {
+ UTIL_THROW_IF(value > static_cast<uint64_t>(std::numeric_limits<std::size_t>::max()), OverflowException, "Integer overflow detected. This model is too big for 32-bit code.");
+ return value;
+}
+
+template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) {
+ return value;
+}
+
+inline std::size_t CheckOverflow(uint64_t value) {
+ return CheckOverflowInternal<sizeof(std::size_t)>(value);
+}
+
} // namespace util
#endif // UTIL_EXCEPTION__
diff --git a/klm/util/file.cc b/klm/util/file.cc
index 98f13983..6bf879ac 100644
--- a/klm/util/file.cc
+++ b/klm/util/file.cc
@@ -6,6 +6,7 @@
#include <cstdio>
#include <iostream>
+#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@@ -111,6 +112,11 @@ void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
}
}
+void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
+ assert(size);
+ if (1 != std::fwrite(data, size, 1, to)) UTIL_THROW(util::ErrnoException, "Short write; requested size " << size);
+}
+
void FSyncOrThrow(int fd) {
// Apparently windows doesn't have fsync?
#if !defined(_WIN32) && !defined(_WIN64)
@@ -119,8 +125,13 @@ void FSyncOrThrow(int fd) {
}
namespace {
-void InternalSeek(int fd, off_t off, int whence) {
+void InternalSeek(int fd, int64_t off, int whence) {
+#if defined(_WIN32) || defined(_WIN64)
+ UTIL_THROW_IF((__int64)-1 == _lseeki64(fd, off, whence), ErrnoException, "Windows seek failed");
+
+#else
UTIL_THROW_IF((off_t)-1 == lseek(fd, off, whence), ErrnoException, "Seek failed");
+#endif
}
} // namespace
@@ -143,6 +154,12 @@ std::FILE *FDOpenOrThrow(scoped_fd &file) {
return ret;
}
+std::FILE *FOpenOrThrow(const char *path, const char *mode) {
+ std::FILE *ret;
+ UTIL_THROW_IF(!(ret = fopen(path, mode)), util::ErrnoException, "Could not fopen " << path << " for " << mode);
+ return ret;
+}
+
TempMaker::TempMaker(const std::string &prefix) : base_(prefix) {
base_ += "XXXXXX";
}
@@ -242,7 +259,9 @@ mkstemp_and_unlink(char *tmpl)
/* Modified for windows and to unlink */
// fd = open (tmpl, O_RDWR | O_CREAT | O_EXCL, _S_IREAD | _S_IWRITE);
- fd = _open (tmpl, _O_RDWR | _O_CREAT | _O_TEMPORARY | _O_EXCL | _O_BINARY, _S_IREAD | _S_IWRITE);
+ int flags = _O_RDWR | _O_CREAT | _O_EXCL | _O_BINARY;
+ flags |= _O_TEMPORARY;
+ fd = _open (tmpl, flags, _S_IREAD | _S_IWRITE);
if (fd >= 0)
{
errno = save_errno;
@@ -260,17 +279,18 @@ mkstemp_and_unlink(char *tmpl)
int
mkstemp_and_unlink(char *tmpl) {
int ret = mkstemp(tmpl);
- if (ret == -1) return -1;
- UTIL_THROW_IF(unlink(tmpl), util::ErrnoException, "Failed to delete " << tmpl);
+ if (ret != -1) {
+ UTIL_THROW_IF(unlink(tmpl), util::ErrnoException, "Failed to delete " << tmpl);
+ }
return ret;
}
#endif
int TempMaker::Make() const {
- std::string copy(base_);
- copy.push_back(0);
+ std::string name(base_);
+ name.push_back(0);
int ret;
- UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&copy[0])), util::ErrnoException, "Failed to make a temporary based on " << base_);
+ UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&name[0])), util::ErrnoException, "Failed to make a temporary based on " << base_);
return ret;
}
diff --git a/klm/util/file.hh b/klm/util/file.hh
index 8af1ff4f..185cb1f3 100644
--- a/klm/util/file.hh
+++ b/klm/util/file.hh
@@ -80,6 +80,7 @@ void ReadOrThrow(int fd, void *to, std::size_t size);
std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount);
void WriteOrThrow(int fd, const void *data_void, std::size_t size);
+void WriteOrThrow(FILE *to, const void *data, std::size_t size);
void FSyncOrThrow(int fd);
@@ -90,6 +91,8 @@ void SeekEnd(int fd);
std::FILE *FDOpenOrThrow(scoped_fd &file);
+std::FILE *FOpenOrThrow(const char *path, const char *mode);
+
class TempMaker {
public:
explicit TempMaker(const std::string &prefix);
@@ -98,9 +101,6 @@ class TempMaker {
int Make() const;
std::FILE *MakeFile() const;
- // This will force you to close the fd instead of leaving it open.
- std::string Name(scoped_fd &opened) const;
-
private:
std::string base_;
};
diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc
index af341d6d..280f438c 100644
--- a/klm/util/file_piece.cc
+++ b/klm/util/file_piece.cc
@@ -5,13 +5,14 @@
#include "util/mmap.hh"
#ifdef WIN32
#include <io.h>
+#else
+#include <unistd.h>
#endif // WIN32
#include <iostream>
#include <string>
#include <limits>
-#include <unistd.h>
#include <assert.h>
#include <ctype.h>
#include <fcntl.h>
diff --git a/klm/util/probing_hash_table.hh b/klm/util/probing_hash_table.hh
index 3354b68e..770faa7e 100644
--- a/klm/util/probing_hash_table.hh
+++ b/klm/util/probing_hash_table.hh
@@ -8,6 +8,7 @@
#include <functional>
#include <assert.h>
+#include <inttypes.h>
namespace util {
@@ -42,8 +43,8 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
typedef EqualT Equal;
public:
- static std::size_t Size(std::size_t entries, float multiplier) {
- std::size_t buckets = std::max(entries + 1, static_cast<std::size_t>(multiplier * static_cast<float>(entries)));
+ static uint64_t Size(uint64_t entries, float multiplier) {
+ uint64_t buckets = std::max(entries + 1, static_cast<uint64_t>(multiplier * static_cast<float>(entries)));
return buckets * sizeof(Entry);
}