summaryrefslogtreecommitdiff
path: root/klm/util
diff options
context:
space:
mode:
authorKenneth Heafield <kenlm@kheafield.com>2011-09-21 18:23:50 -0400
committerKenneth Heafield <kenlm@kheafield.com>2011-09-21 18:23:50 -0400
commit83cae8bd92a2ebffcf2b8b4d2500766da008fe3d (patch)
tree575a54bbebc74eaafef6e9ebe4b37e6ad211b632 /klm/util
parent4b619c0ca5b8c521c6fb4c3df1c4b43756baa306 (diff)
Updated kenlm. Includes left state support but not the cdec-side use of it. Updated binary format.
Diffstat (limited to 'klm/util')
-rw-r--r--klm/util/bit_packing.hh14
-rw-r--r--klm/util/exception.cc5
-rw-r--r--klm/util/exception.hh6
-rw-r--r--klm/util/file.cc74
-rw-r--r--klm/util/file.hh74
-rw-r--r--klm/util/file_piece.cc18
-rw-r--r--klm/util/file_piece.hh14
-rw-r--r--klm/util/mmap.cc18
-rw-r--r--klm/util/mmap.hh4
-rw-r--r--klm/util/murmur_hash.cc258
-rw-r--r--klm/util/scoped.cc24
-rw-r--r--klm/util/scoped.hh58
-rw-r--r--klm/util/sized_iterator.hh107
-rw-r--r--klm/util/tokenize_piece.hh69
14 files changed, 504 insertions, 239 deletions
diff --git a/klm/util/bit_packing.hh b/klm/util/bit_packing.hh
index 9f47d559..33266b94 100644
--- a/klm/util/bit_packing.hh
+++ b/klm/util/bit_packing.hh
@@ -86,6 +86,20 @@ inline void WriteFloat32(void *base, uint64_t bit_off, float value) {
const uint32_t kSignBit = 0x80000000;
+inline void SetSign(float &to) {
+ FloatEnc enc;
+ enc.f = to;
+ enc.i |= kSignBit;
+ to = enc.f;
+}
+
+inline void UnsetSign(float &to) {
+ FloatEnc enc;
+ enc.f = to;
+ enc.i &= ~kSignBit;
+ to = enc.f;
+}
+
inline float ReadNonPositiveFloat31(const void *base, uint64_t bit_off) {
FloatEnc encoded;
encoded.i = ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 31);
diff --git a/klm/util/exception.cc b/klm/util/exception.cc
index 62280970..96951495 100644
--- a/klm/util/exception.cc
+++ b/klm/util/exception.cc
@@ -79,4 +79,9 @@ ErrnoException::ErrnoException() throw() : errno_(errno) {
ErrnoException::~ErrnoException() throw() {}
+EndOfFileException::EndOfFileException() throw() {
+ *this << "End of file";
+}
+EndOfFileException::~EndOfFileException() throw() {}
+
} // namespace util
diff --git a/klm/util/exception.hh b/klm/util/exception.hh
index 81675a57..6d6a37cb 100644
--- a/klm/util/exception.hh
+++ b/klm/util/exception.hh
@@ -105,6 +105,12 @@ class ErrnoException : public Exception {
int errno_;
};
+class EndOfFileException : public Exception {
+ public:
+ EndOfFileException() throw();
+ ~EndOfFileException() throw();
+};
+
} // namespace util
#endif // UTIL_EXCEPTION__
diff --git a/klm/util/file.cc b/klm/util/file.cc
new file mode 100644
index 00000000..d707568e
--- /dev/null
+++ b/klm/util/file.cc
@@ -0,0 +1,74 @@
+#include "util/file.hh"
+
+#include "util/exception.hh"
+
+#include <cstdlib>
+#include <cstdio>
+#include <iostream>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+namespace util {
+
+scoped_fd::~scoped_fd() {
+ if (fd_ != -1 && close(fd_)) {
+ std::cerr << "Could not close file " << fd_ << std::endl;
+ std::abort();
+ }
+}
+
+scoped_FILE::~scoped_FILE() {
+ if (file_ && std::fclose(file_)) {
+ std::cerr << "Could not close file " << std::endl;
+ std::abort();
+ }
+}
+
+int OpenReadOrThrow(const char *name) {
+ int ret;
+ UTIL_THROW_IF(-1 == (ret = open(name, O_RDONLY)), ErrnoException, "while opening " << name);
+ return ret;
+}
+
+int CreateOrThrow(const char *name) {
+ int ret;
+ UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR)), ErrnoException, "while creating " << name);
+ return ret;
+}
+
+off_t SizeFile(int fd) {
+ struct stat sb;
+ if (fstat(fd, &sb) == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
+ return sb.st_size;
+}
+
+void ReadOrThrow(int fd, void *to_void, std::size_t amount) {
+ uint8_t *to = static_cast<uint8_t*>(to_void);
+ while (amount) {
+ ssize_t ret = read(fd, to, amount);
+ if (ret == -1) UTIL_THROW(ErrnoException, "Reading " << amount << " from fd " << fd << " failed.");
+ if (ret == 0) UTIL_THROW(Exception, "Hit EOF in fd " << fd << " but there should be " << amount << " more bytes to read.");
+ amount -= ret;
+ to += ret;
+ }
+}
+
+void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
+ const uint8_t *data = static_cast<const uint8_t*>(data_void);
+ while (size) {
+ ssize_t ret = write(fd, data, size);
+ if (ret < 1) UTIL_THROW(util::ErrnoException, "Write failed");
+ data += ret;
+ size -= ret;
+ }
+}
+
+void RemoveOrThrow(const char *name) {
+ UTIL_THROW_IF(std::remove(name), util::ErrnoException, "Could not remove " << name);
+}
+
+} // namespace util
diff --git a/klm/util/file.hh b/klm/util/file.hh
new file mode 100644
index 00000000..d6cca41d
--- /dev/null
+++ b/klm/util/file.hh
@@ -0,0 +1,74 @@
+#ifndef UTIL_FILE__
+#define UTIL_FILE__
+
+#include <cstdio>
+#include <unistd.h>
+
+namespace util {
+
+class scoped_fd {
+ public:
+ scoped_fd() : fd_(-1) {}
+
+ explicit scoped_fd(int fd) : fd_(fd) {}
+
+ ~scoped_fd();
+
+ void reset(int to) {
+ scoped_fd other(fd_);
+ fd_ = to;
+ }
+
+ int get() const { return fd_; }
+
+ int operator*() const { return fd_; }
+
+ int release() {
+ int ret = fd_;
+ fd_ = -1;
+ return ret;
+ }
+
+ operator bool() { return fd_ != -1; }
+
+ private:
+ int fd_;
+
+ scoped_fd(const scoped_fd &);
+ scoped_fd &operator=(const scoped_fd &);
+};
+
+class scoped_FILE {
+ public:
+ explicit scoped_FILE(std::FILE *file = NULL) : file_(file) {}
+
+ ~scoped_FILE();
+
+ std::FILE *get() { return file_; }
+ const std::FILE *get() const { return file_; }
+
+ void reset(std::FILE *to = NULL) {
+ scoped_FILE other(file_);
+ file_ = to;
+ }
+
+ private:
+ std::FILE *file_;
+};
+
+int OpenReadOrThrow(const char *name);
+
+int CreateOrThrow(const char *name);
+
+// Return value for SizeFile when it can't size properly.
+const off_t kBadSize = -1;
+off_t SizeFile(int fd);
+
+void ReadOrThrow(int fd, void *to, std::size_t size);
+void WriteOrThrow(int fd, const void *data_void, std::size_t size);
+
+void RemoveOrThrow(const char *name);
+
+} // namespace util
+
+#endif // UTIL_FILE__
diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc
index cbe4234f..b57582a0 100644
--- a/klm/util/file_piece.cc
+++ b/klm/util/file_piece.cc
@@ -1,6 +1,7 @@
#include "util/file_piece.hh"
#include "util/exception.hh"
+#include "util/file.hh"
#include <iostream>
#include <string>
@@ -21,11 +22,6 @@
namespace util {
-EndOfFileException::EndOfFileException() throw() {
- *this << "End of file";
-}
-EndOfFileException::~EndOfFileException() throw() {}
-
ParseNumberException::ParseNumberException(StringPiece value) throw() {
*this << "Could not parse \"" << value << "\" into a number";
}
@@ -40,18 +36,6 @@ GZException::GZException(void *file) {
// Sigh this is the only way I could come up with to do a _const_ bool. It has ' ', '\f', '\n', '\r', '\t', and '\v' (same as isspace on C locale).
const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-int OpenReadOrThrow(const char *name) {
- int ret;
- UTIL_THROW_IF(-1 == (ret = open(name, O_RDONLY)), ErrnoException, "while opening " << name);
- return ret;
-}
-
-off_t SizeFile(int fd) {
- struct stat sb;
- if (fstat(fd, &sb) == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
- return sb.st_size;
-}
-
FilePiece::FilePiece(const char *name, std::ostream *show_progress, off_t min_buffer) :
file_(OpenReadOrThrow(name)), total_size_(SizeFile(file_.get())), page_(sysconf(_SC_PAGE_SIZE)),
progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) {
diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh
index a5c00910..a627f38c 100644
--- a/klm/util/file_piece.hh
+++ b/klm/util/file_piece.hh
@@ -3,9 +3,9 @@
#include "util/ersatz_progress.hh"
#include "util/exception.hh"
+#include "util/file.hh"
#include "util/have.hh"
#include "util/mmap.hh"
-#include "util/scoped.hh"
#include "util/string_piece.hh"
#include <string>
@@ -14,12 +14,6 @@
namespace util {
-class EndOfFileException : public Exception {
- public:
- EndOfFileException() throw();
- ~EndOfFileException() throw();
-};
-
class ParseNumberException : public Exception {
public:
explicit ParseNumberException(StringPiece value) throw();
@@ -33,14 +27,8 @@ class GZException : public Exception {
~GZException() throw() {}
};
-int OpenReadOrThrow(const char *name);
-
extern const bool kSpaces[256];
-// Return value for SizeFile when it can't size properly.
-const off_t kBadSize = -1;
-off_t SizeFile(int fd);
-
// Memory backing the returned StringPiece may vanish on the next call.
class FilePiece {
public:
diff --git a/klm/util/mmap.cc b/klm/util/mmap.cc
index e7c0643b..5ce7adc9 100644
--- a/klm/util/mmap.cc
+++ b/klm/util/mmap.cc
@@ -1,6 +1,6 @@
#include "util/exception.hh"
+#include "util/file.hh"
#include "util/mmap.hh"
-#include "util/scoped.hh"
#include <iostream>
@@ -66,20 +66,6 @@ void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int
return ret;
}
-namespace {
-void ReadAll(int fd, void *to_void, std::size_t amount) {
- uint8_t *to = static_cast<uint8_t*>(to_void);
- while (amount) {
- ssize_t ret = read(fd, to, amount);
- if (ret == -1) UTIL_THROW(ErrnoException, "Reading " << amount << " from fd " << fd << " failed.");
- if (ret == 0) UTIL_THROW(Exception, "Hit EOF in fd " << fd << " but there should be " << amount << " more bytes to read.");
- amount -= ret;
- to += ret;
- }
-}
-
-} // namespace
-
const int kFileFlags =
#ifdef MAP_FILE
MAP_FILE | MAP_SHARED
@@ -106,7 +92,7 @@ void MapRead(LoadMethod method, int fd, off_t offset, std::size_t size, scoped_m
out.reset(malloc(size), size, scoped_memory::MALLOC_ALLOCATED);
if (!out.get()) UTIL_THROW(util::ErrnoException, "Allocating " << size << " bytes with malloc");
if (-1 == lseek(fd, offset, SEEK_SET)) UTIL_THROW(ErrnoException, "lseek to " << offset << " in fd " << fd << " failed.");
- ReadAll(fd, out.get(), size);
+ ReadOrThrow(fd, out.get(), size);
break;
}
}
diff --git a/klm/util/mmap.hh b/klm/util/mmap.hh
index e4439fa4..b0eb6672 100644
--- a/klm/util/mmap.hh
+++ b/klm/util/mmap.hh
@@ -2,8 +2,6 @@
#define UTIL_MMAP__
// Utilities for mmaped files.
-#include "util/scoped.hh"
-
#include <cstddef>
#include <inttypes.h>
@@ -11,6 +9,8 @@
namespace util {
+class scoped_fd;
+
// (void*)-1 is MAP_FAILED; this is done to avoid including the mmap header here.
class scoped_mmap {
public:
diff --git a/klm/util/murmur_hash.cc b/klm/util/murmur_hash.cc
index fec47fd9..d58a0727 100644
--- a/klm/util/murmur_hash.cc
+++ b/klm/util/murmur_hash.cc
@@ -1,129 +1,129 @@
-/* Downloaded from http://sites.google.com/site/murmurhash/ which says "All
- * code is released to the public domain. For business purposes, Murmurhash is
- * under the MIT license."
- * This is modified from the original:
- * ULL tag on 0xc6a4a7935bd1e995 so this will compile on 32-bit.
- * length changed to unsigned int.
- * placed in namespace util
- * add MurmurHashNative
- * default option = 0 for seed
- */
-
-#include "util/murmur_hash.hh"
-
-namespace util {
-
-//-----------------------------------------------------------------------------
-// MurmurHash2, 64-bit versions, by Austin Appleby
-
-// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
-// and endian-ness issues if used across multiple platforms.
-
-// 64-bit hash for 64-bit platforms
-
-uint64_t MurmurHash64A ( const void * key, std::size_t len, unsigned int seed )
-{
- const uint64_t m = 0xc6a4a7935bd1e995ULL;
- const int r = 47;
-
- uint64_t h = seed ^ (len * m);
-
- const uint64_t * data = (const uint64_t *)key;
- const uint64_t * end = data + (len/8);
-
- while(data != end)
- {
- uint64_t k = *data++;
-
- k *= m;
- k ^= k >> r;
- k *= m;
-
- h ^= k;
- h *= m;
- }
-
- const unsigned char * data2 = (const unsigned char*)data;
-
- switch(len & 7)
- {
- case 7: h ^= uint64_t(data2[6]) << 48;
- case 6: h ^= uint64_t(data2[5]) << 40;
- case 5: h ^= uint64_t(data2[4]) << 32;
- case 4: h ^= uint64_t(data2[3]) << 24;
- case 3: h ^= uint64_t(data2[2]) << 16;
- case 2: h ^= uint64_t(data2[1]) << 8;
- case 1: h ^= uint64_t(data2[0]);
- h *= m;
- };
-
- h ^= h >> r;
- h *= m;
- h ^= h >> r;
-
- return h;
-}
-
-
-// 64-bit hash for 32-bit platforms
-
-uint64_t MurmurHash64B ( const void * key, std::size_t len, unsigned int seed )
-{
- const unsigned int m = 0x5bd1e995;
- const int r = 24;
-
- unsigned int h1 = seed ^ len;
- unsigned int h2 = 0;
-
- const unsigned int * data = (const unsigned int *)key;
-
- while(len >= 8)
- {
- unsigned int k1 = *data++;
- k1 *= m; k1 ^= k1 >> r; k1 *= m;
- h1 *= m; h1 ^= k1;
- len -= 4;
-
- unsigned int k2 = *data++;
- k2 *= m; k2 ^= k2 >> r; k2 *= m;
- h2 *= m; h2 ^= k2;
- len -= 4;
- }
-
- if(len >= 4)
- {
- unsigned int k1 = *data++;
- k1 *= m; k1 ^= k1 >> r; k1 *= m;
- h1 *= m; h1 ^= k1;
- len -= 4;
- }
-
- switch(len)
- {
- case 3: h2 ^= ((unsigned char*)data)[2] << 16;
- case 2: h2 ^= ((unsigned char*)data)[1] << 8;
- case 1: h2 ^= ((unsigned char*)data)[0];
- h2 *= m;
- };
-
- h1 ^= h2 >> 18; h1 *= m;
- h2 ^= h1 >> 22; h2 *= m;
- h1 ^= h2 >> 17; h1 *= m;
- h2 ^= h1 >> 19; h2 *= m;
-
- uint64_t h = h1;
-
- h = (h << 32) | h2;
-
- return h;
-}
-
-uint64_t MurmurHashNative(const void * key, std::size_t len, unsigned int seed) {
- if (sizeof(int) == 4) {
- return MurmurHash64B(key, len, seed);
- } else {
- return MurmurHash64A(key, len, seed);
- }
-}
-
-} // namespace util
+/* Downloaded from http://sites.google.com/site/murmurhash/ which says "All
+ * code is released to the public domain. For business purposes, Murmurhash is
+ * under the MIT license."
+ * This is modified from the original:
+ * ULL tag on 0xc6a4a7935bd1e995 so this will compile on 32-bit.
+ * length changed to unsigned int.
+ * placed in namespace util
+ * add MurmurHashNative
+ * default option = 0 for seed
+ */
+
+#include "util/murmur_hash.hh"
+
+namespace util {
+
+//-----------------------------------------------------------------------------
+// MurmurHash2, 64-bit versions, by Austin Appleby
+
+// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
+// and endian-ness issues if used across multiple platforms.
+
+// 64-bit hash for 64-bit platforms
+
+uint64_t MurmurHash64A ( const void * key, std::size_t len, unsigned int seed )
+{
+ const uint64_t m = 0xc6a4a7935bd1e995ULL;
+ const int r = 47;
+
+ uint64_t h = seed ^ (len * m);
+
+ const uint64_t * data = (const uint64_t *)key;
+ const uint64_t * end = data + (len/8);
+
+ while(data != end)
+ {
+ uint64_t k = *data++;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h ^= k;
+ h *= m;
+ }
+
+ const unsigned char * data2 = (const unsigned char*)data;
+
+ switch(len & 7)
+ {
+ case 7: h ^= uint64_t(data2[6]) << 48;
+ case 6: h ^= uint64_t(data2[5]) << 40;
+ case 5: h ^= uint64_t(data2[4]) << 32;
+ case 4: h ^= uint64_t(data2[3]) << 24;
+ case 3: h ^= uint64_t(data2[2]) << 16;
+ case 2: h ^= uint64_t(data2[1]) << 8;
+ case 1: h ^= uint64_t(data2[0]);
+ h *= m;
+ };
+
+ h ^= h >> r;
+ h *= m;
+ h ^= h >> r;
+
+ return h;
+}
+
+
+// 64-bit hash for 32-bit platforms
+
+uint64_t MurmurHash64B ( const void * key, std::size_t len, unsigned int seed )
+{
+ const unsigned int m = 0x5bd1e995;
+ const int r = 24;
+
+ unsigned int h1 = seed ^ len;
+ unsigned int h2 = 0;
+
+ const unsigned int * data = (const unsigned int *)key;
+
+ while(len >= 8)
+ {
+ unsigned int k1 = *data++;
+ k1 *= m; k1 ^= k1 >> r; k1 *= m;
+ h1 *= m; h1 ^= k1;
+ len -= 4;
+
+ unsigned int k2 = *data++;
+ k2 *= m; k2 ^= k2 >> r; k2 *= m;
+ h2 *= m; h2 ^= k2;
+ len -= 4;
+ }
+
+ if(len >= 4)
+ {
+ unsigned int k1 = *data++;
+ k1 *= m; k1 ^= k1 >> r; k1 *= m;
+ h1 *= m; h1 ^= k1;
+ len -= 4;
+ }
+
+ switch(len)
+ {
+ case 3: h2 ^= ((unsigned char*)data)[2] << 16;
+ case 2: h2 ^= ((unsigned char*)data)[1] << 8;
+ case 1: h2 ^= ((unsigned char*)data)[0];
+ h2 *= m;
+ };
+
+ h1 ^= h2 >> 18; h1 *= m;
+ h2 ^= h1 >> 22; h2 *= m;
+ h1 ^= h2 >> 17; h1 *= m;
+ h2 ^= h1 >> 19; h2 *= m;
+
+ uint64_t h = h1;
+
+ h = (h << 32) | h2;
+
+ return h;
+}
+
+uint64_t MurmurHashNative(const void * key, std::size_t len, unsigned int seed) {
+ if (sizeof(int) == 4) {
+ return MurmurHash64B(key, len, seed);
+ } else {
+ return MurmurHash64A(key, len, seed);
+ }
+}
+
+} // namespace util
diff --git a/klm/util/scoped.cc b/klm/util/scoped.cc
deleted file mode 100644
index a4cc5016..00000000
--- a/klm/util/scoped.cc
+++ /dev/null
@@ -1,24 +0,0 @@
-#include "util/scoped.hh"
-
-#include <iostream>
-
-#include <stdlib.h>
-#include <unistd.h>
-
-namespace util {
-
-scoped_fd::~scoped_fd() {
- if (fd_ != -1 && close(fd_)) {
- std::cerr << "Could not close file " << fd_ << std::endl;
- abort();
- }
-}
-
-scoped_FILE::~scoped_FILE() {
- if (file_ && fclose(file_)) {
- std::cerr << "Could not close file " << std::endl;
- abort();
- }
-}
-
-} // namespace util
diff --git a/klm/util/scoped.hh b/klm/util/scoped.hh
index d36a7df3..12e6652b 100644
--- a/klm/util/scoped.hh
+++ b/klm/util/scoped.hh
@@ -1,10 +1,11 @@
#ifndef UTIL_SCOPED__
#define UTIL_SCOPED__
-/* Other scoped objects in the style of scoped_ptr. */
+#include "util/exception.hh"
+/* Other scoped objects in the style of scoped_ptr. */
#include <cstddef>
-#include <cstdio>
+#include <cstdlib>
namespace util {
@@ -34,52 +35,33 @@ template <class T, class R, R (*Free)(T*)> class scoped_thing {
scoped_thing &operator=(const scoped_thing &);
};
-class scoped_fd {
+class scoped_malloc {
public:
- scoped_fd() : fd_(-1) {}
+ scoped_malloc() : p_(NULL) {}
- explicit scoped_fd(int fd) : fd_(fd) {}
+ scoped_malloc(void *p) : p_(p) {}
- ~scoped_fd();
+ ~scoped_malloc() { std::free(p_); }
- void reset(int to) {
- scoped_fd other(fd_);
- fd_ = to;
+ void reset(void *p = NULL) {
+ scoped_malloc other(p_);
+ p_ = p;
}
- int get() const { return fd_; }
-
- int operator*() const { return fd_; }
-
- int release() {
- int ret = fd_;
- fd_ = -1;
- return ret;
+ void call_realloc(std::size_t to) {
+ void *ret;
+ UTIL_THROW_IF(!(ret = std::realloc(p_, to)), util::ErrnoException, "realloc to " << to << " bytes failed.");
+ p_ = ret;
}
- private:
- int fd_;
-
- scoped_fd(const scoped_fd &);
- scoped_fd &operator=(const scoped_fd &);
-};
-
-class scoped_FILE {
- public:
- explicit scoped_FILE(std::FILE *file = NULL) : file_(file) {}
-
- ~scoped_FILE();
-
- std::FILE *get() { return file_; }
- const std::FILE *get() const { return file_; }
-
- void reset(std::FILE *to = NULL) {
- scoped_FILE other(file_);
- file_ = to;
- }
+ void *get() { return p_; }
+ const void *get() const { return p_; }
private:
- std::FILE *file_;
+ void *p_;
+
+ scoped_malloc(const scoped_malloc &);
+ scoped_malloc &operator=(const scoped_malloc &);
};
// Hat tip to boost.
diff --git a/klm/util/sized_iterator.hh b/klm/util/sized_iterator.hh
new file mode 100644
index 00000000..47dfc245
--- /dev/null
+++ b/klm/util/sized_iterator.hh
@@ -0,0 +1,107 @@
+#ifndef UTIL_SIZED_ITERATOR__
+#define UTIL_SIZED_ITERATOR__
+
+#include "util/proxy_iterator.hh"
+
+#include <functional>
+#include <string>
+
+#include <inttypes.h>
+#include <string.h>
+
+namespace util {
+
+class SizedInnerIterator {
+ public:
+ SizedInnerIterator() {}
+
+ SizedInnerIterator(void *ptr, std::size_t size) : ptr_(static_cast<uint8_t*>(ptr)), size_(size) {}
+
+ bool operator==(const SizedInnerIterator &other) const {
+ return ptr_ == other.ptr_;
+ }
+ bool operator<(const SizedInnerIterator &other) const {
+ return ptr_ < other.ptr_;
+ }
+ SizedInnerIterator &operator+=(std::ptrdiff_t amount) {
+ ptr_ += amount * size_;
+ return *this;
+ }
+ std::ptrdiff_t operator-(const SizedInnerIterator &other) const {
+ return (ptr_ - other.ptr_) / size_;
+ }
+
+ const void *Data() const { return ptr_; }
+ void *Data() { return ptr_; }
+ std::size_t EntrySize() const { return size_; }
+
+ private:
+ uint8_t *ptr_;
+ std::size_t size_;
+};
+
+class SizedProxy {
+ public:
+ SizedProxy() {}
+
+ SizedProxy(void *ptr, std::size_t size) : inner_(ptr, size) {}
+
+ operator std::string() const {
+ return std::string(reinterpret_cast<const char*>(inner_.Data()), inner_.EntrySize());
+ }
+
+ SizedProxy &operator=(const SizedProxy &from) {
+ memcpy(inner_.Data(), from.inner_.Data(), inner_.EntrySize());
+ return *this;
+ }
+
+ SizedProxy &operator=(const std::string &from) {
+ memcpy(inner_.Data(), from.data(), inner_.EntrySize());
+ return *this;
+ }
+
+ const void *Data() const { return inner_.Data(); }
+ void *Data() { return inner_.Data(); }
+
+ private:
+ friend class util::ProxyIterator<SizedProxy>;
+
+ typedef std::string value_type;
+
+ typedef SizedInnerIterator InnerIterator;
+
+ InnerIterator &Inner() { return inner_; }
+ const InnerIterator &Inner() const { return inner_; }
+ InnerIterator inner_;
+};
+
+typedef ProxyIterator<SizedProxy> SizedIterator;
+
+inline SizedIterator SizedIt(void *ptr, std::size_t size) { return SizedIterator(SizedProxy(ptr, size)); }
+
+// Useful wrapper for a comparison function i.e. sort.
+template <class Delegate, class Proxy = SizedProxy> class SizedCompare : public std::binary_function<const Proxy &, const Proxy &, bool> {
+ public:
+ explicit SizedCompare(const Delegate &delegate = Delegate()) : delegate_(delegate) {}
+
+ bool operator()(const Proxy &first, const Proxy &second) const {
+ return delegate_(first.Data(), second.Data());
+ }
+ bool operator()(const Proxy &first, const std::string &second) const {
+ return delegate_(first.Data(), second.data());
+ }
+ bool operator()(const std::string &first, const Proxy &second) const {
+ return delegate_(first.data(), second.Data());
+ }
+ bool operator()(const std::string &first, const std::string &second) const {
+ return delegate_(first.data(), second.data());
+ }
+
+ const Delegate &GetDelegate() const { return delegate_; }
+
+ private:
+ const Delegate delegate_;
+};
+
+} // namespace util
+#endif // UTIL_SIZED_ITERATOR__
diff --git a/klm/util/tokenize_piece.hh b/klm/util/tokenize_piece.hh
new file mode 100644
index 00000000..ee1c7ab2
--- /dev/null
+++ b/klm/util/tokenize_piece.hh
@@ -0,0 +1,69 @@
+#ifndef UTIL_TOKENIZE_PIECE__
+#define UTIL_TOKENIZE_PIECE__
+
+#include "util/string_piece.hh"
+
+#include <boost/iterator/iterator_facade.hpp>
+
+/* Usage:
+ *
+ * for (PieceIterator<' '> i(" foo \r\n bar "); i; ++i) {
+ * std::cout << *i << "\n";
+ * }
+ *
+ */
+
+namespace util {
+
+// Tokenize a StringPiece using an iterator interface. boost::tokenizer doesn't work with StringPiece.
+template <char d> class PieceIterator : public boost::iterator_facade<PieceIterator<d>, const StringPiece, boost::forward_traversal_tag> {
+ public:
+ // Default construct is end, which is also accessed by kEndPieceIterator;
+ PieceIterator() {}
+
+ explicit PieceIterator(const StringPiece &str)
+ : after_(str) {
+ increment();
+ }
+
+ bool operator!() const {
+ return after_.data() == 0;
+ }
+ operator bool() const {
+ return after_.data() != 0;
+ }
+
+ static PieceIterator<d> end() {
+ return PieceIterator<d>();
+ }
+
+ private:
+ friend class boost::iterator_core_access;
+
+ void increment() {
+ const char *start = after_.data();
+ for (; (start != after_.data() + after_.size()) && (d == *start); ++start) {}
+ if (start == after_.data() + after_.size()) {
+ // End condition.
+ after_.clear();
+ return;
+ }
+ const char *finish = start;
+ for (; (finish != after_.data() + after_.size()) && (d != *finish); ++finish) {}
+ current_ = StringPiece(start, finish - start);
+ after_ = StringPiece(finish, after_.data() + after_.size() - finish);
+ }
+
+ bool equal(const PieceIterator &other) const {
+ return after_.data() == other.after_.data();
+ }
+
+ const StringPiece &dereference() const { return current_; }
+
+ StringPiece current_;
+ StringPiece after_;
+};
+
+} // namespace util
+
+#endif // UTIL_TOKENIZE_PIECE__