summaryrefslogtreecommitdiff
path: root/klm/util/file.cc
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2014-10-13 00:42:37 -0400
committerChris Dyer <redpony@gmail.com>2014-10-13 00:42:37 -0400
commitb1ed81ef3216b212295afa76c5d20a56fb647204 (patch)
tree9633cdc1b8a341dfa58b0b7fec0e2cae44d28835 /klm/util/file.cc
parent1b17f61d359be6e1c3cea29f8c100db3bcdd73a0 (diff)
new kenlm
Diffstat (limited to 'klm/util/file.cc')
-rw-r--r--klm/util/file.cc142
1 files changed, 93 insertions, 49 deletions
diff --git a/klm/util/file.cc b/klm/util/file.cc
index 51eaf972..aa61cf9a 100644
--- a/klm/util/file.cc
+++ b/klm/util/file.cc
@@ -5,28 +5,29 @@
#include "util/exception.hh"
+#include <algorithm>
#include <cstdlib>
#include <cstdio>
-#include <sstream>
#include <iostream>
+#include <limits>
+#include <sstream>
+
#include <assert.h>
#include <errno.h>
+#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdint.h>
-#if defined __MINGW32__
+#if defined(__MINGW32__)
#include <windows.h>
#include <unistd.h>
#warning "The file functions on MinGW have not been tested for file sizes above 2^31 - 1. Please read https://stackoverflow.com/questions/12539488/determine-64-bit-file-size-in-c-on-mingw-32-bit and fix"
#elif defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#include <io.h>
-#include <algorithm>
-#include <limits.h>
-#include <limits>
#else
#include <unistd.h>
#endif
@@ -40,9 +41,9 @@ scoped_fd::~scoped_fd() {
}
}
-scoped_FILE::~scoped_FILE() {
- if (file_ && std::fclose(file_)) {
- std::cerr << "Could not close file " << std::endl;
+void scoped_FILE_closer::Close(std::FILE *file) {
+ if (file && std::fclose(file)) {
+ std::cerr << "Could not close file " << file << std::endl;
std::abort();
}
}
@@ -111,7 +112,7 @@ uint64_t SizeOrThrow(int fd) {
void ResizeOrThrow(int fd, uint64_t to) {
#if defined __MINGW32__
- // Does this handle 64-bit?
+ // Does this handle 64-bit?
int ret = ftruncate
#elif defined(_WIN32) || defined(_WIN64)
errno_t ret = _chsize_s
@@ -128,8 +129,10 @@ namespace {
std::size_t GuardLarge(std::size_t size) {
// The following operating systems have broken read/write/pread/pwrite that
// only supports up to 2^31.
+ // OS X man pages claim to support 64-bit, but Kareem M. Darwish had problems
+ // building with larger files, so APPLE is also here.
#if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__) || defined(OS_ANDROID) || defined(__MINGW32__)
- return std::min(static_cast<std::size_t>(static_cast<unsigned>(-1)), size);
+ return size < INT_MAX ? size : INT_MAX;
#else
return size;
#endif
@@ -172,13 +175,44 @@ std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) {
return amount;
}
-void PReadOrThrow(int fd, void *to_void, std::size_t size, uint64_t off) {
- uint8_t *to = static_cast<uint8_t*>(to_void);
+void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
+ const uint8_t *data = static_cast<const uint8_t*>(data_void);
+ while (size) {
#if defined(_WIN32) || defined(_WIN64)
- UTIL_THROW(Exception, "This pread implementation for windows is broken. Please send me a patch that does not change the file pointer. Atomically. Or send me an implementation of pwrite that is allowed to change the file pointer but can be called concurrently with pread.");
- const std::size_t kMaxDWORD = static_cast<std::size_t>(4294967295UL);
+ int ret;
+#else
+ ssize_t ret;
#endif
- for (;size ;) {
+ errno = 0;
+ do {
+ ret =
+#if defined(_WIN32) || defined(_WIN64)
+ _write
+#else
+ write
+#endif
+ (fd, data, GuardLarge(size));
+ } while (ret == -1 && errno == EINTR);
+ UTIL_THROW_IF_ARG(ret < 1, FDException, (fd), "while writing " << size << " bytes");
+ data += ret;
+ size -= ret;
+ }
+}
+
+void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
+ if (!size) return;
+ UTIL_THROW_IF(1 != std::fwrite(data, size, 1, to), ErrnoException, "Short write; requested size " << size);
+}
+
+#if defined(_WIN32) || defined(_WIN64)
+namespace {
+const std::size_t kMaxDWORD = static_cast<std::size_t>(4294967295UL);
+} // namespace
+#endif
+
+void ErsatzPRead(int fd, void *to_void, std::size_t size, uint64_t off) {
+ uint8_t *to = static_cast<uint8_t*>(to_void);
+ while (size) {
#if defined(_WIN32) || defined(_WIN64)
/* BROKEN: changes file pointer. Even if you save it and change it back, it won't be safe to use concurrently with write() or read() which lmplz does. */
// size_t might be 64-bit. DWORD is always 32.
@@ -192,16 +226,15 @@ void PReadOrThrow(int fd, void *to_void, std::size_t size, uint64_t off) {
#else
ssize_t ret;
errno = 0;
- do {
- ret =
+ ret =
#ifdef OS_ANDROID
- pread64
+ pread64
#else
- pread
+ pread
#endif
- (fd, to, GuardLarge(size), off);
- } while (ret == -1 && errno == EINTR);
+ (fd, to, GuardLarge(size), off);
if (ret <= 0) {
+ if (ret == -1 && errno == EINTR) continue;
UTIL_THROW_IF(ret == 0, EndOfFileException, " for reading " << size << " bytes at " << off << " from " << NameFromFD(fd));
UTIL_THROW_ARG(FDException, (fd), "while reading " << size << " bytes at offset " << off);
}
@@ -212,34 +245,41 @@ void PReadOrThrow(int fd, void *to_void, std::size_t size, uint64_t off) {
}
}
-void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
- const uint8_t *data = static_cast<const uint8_t*>(data_void);
- while (size) {
+void ErsatzPWrite(int fd, const void *from_void, std::size_t size, uint64_t off) {
+ const uint8_t *from = static_cast<const uint8_t*>(from_void);
+ while(size) {
#if defined(_WIN32) || defined(_WIN64)
- int ret;
+ /* Changes file pointer. Even if you save it and change it back, it won't be safe to use concurrently with write() or read() */
+ // size_t might be 64-bit. DWORD is always 32.
+ DWORD writing = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, size));
+ DWORD ret;
+ OVERLAPPED overlapped;
+ memset(&overlapped, 0, sizeof(OVERLAPPED));
+ overlapped.Offset = static_cast<DWORD>(off);
+ overlapped.OffsetHigh = static_cast<DWORD>(off >> 32);
+ UTIL_THROW_IF(!WriteFile((HANDLE)_get_osfhandle(fd), from, writing, &ret, &overlapped), Exception, "WriteFile failed for offset " << off);
#else
ssize_t ret;
-#endif
errno = 0;
- do {
- ret =
-#if defined(_WIN32) || defined(_WIN64)
- _write
+ ret =
+#ifdef OS_ANDROID
+ pwrite64
#else
- write
+ pwrite
+#endif
+ (fd, from, GuardLarge(size), off);
+ if (ret <= 0) {
+ if (ret == -1 && errno == EINTR) continue;
+ UTIL_THROW_IF(ret == 0, EndOfFileException, " for writing " << size << " bytes at " << off << " from " << NameFromFD(fd));
+ UTIL_THROW_ARG(FDException, (fd), "while writing " << size << " bytes at offset " << off);
+ }
#endif
- (fd, data, GuardLarge(size));
- } while (ret == -1 && errno == EINTR);
- UTIL_THROW_IF_ARG(ret < 1, FDException, (fd), "while writing " << size << " bytes");
- data += ret;
size -= ret;
+ off += ret;
+ from += ret;
}
}
-void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
- if (!size) return;
- UTIL_THROW_IF(1 != std::fwrite(data, size, 1, to), ErrnoException, "Short write; requested size " << size);
-}
void FSyncOrThrow(int fd) {
// Apparently windows doesn't have fsync?
@@ -443,8 +483,8 @@ void NormalizeTempPrefix(std::string &base) {
) base += '/';
}
-int MakeTemp(const std::string &base) {
- std::string name(base);
+int MakeTemp(const StringPiece &base) {
+ std::string name(base.data(), base.size());
name += "XXXXXX";
name.push_back(0);
int ret;
@@ -452,7 +492,7 @@ int MakeTemp(const std::string &base) {
return ret;
}
-std::FILE *FMakeTemp(const std::string &base) {
+std::FILE *FMakeTemp(const StringPiece &base) {
util::scoped_fd file(MakeTemp(base));
return FDOpenOrThrow(file);
}
@@ -478,14 +518,18 @@ bool TryName(int fd, std::string &out) {
if (-1 == lstat(name.c_str(), &sb))
return false;
out.resize(sb.st_size + 1);
- ssize_t ret = readlink(name.c_str(), &out[0], sb.st_size + 1);
- if (-1 == ret)
- return false;
- if (ret > sb.st_size) {
- // Increased in size?!
- return false;
+ // lstat gave us a size, but I've seen it grow, possibly due to symlinks on top of symlinks.
+ while (true) {
+ ssize_t ret = readlink(name.c_str(), &out[0], out.size());
+ if (-1 == ret)
+ return false;
+ if ((size_t)ret < out.size()) {
+ out.resize(ret);
+ break;
+ }
+ // Exponential growth.
+ out.resize(out.size() * 2);
}
- out.resize(ret);
// Don't use the non-file names.
if (!out.empty() && out[0] != '/')
return false;