From 89238977fc9d8f8d9a6421b0d4f35afc200f08e7 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Tue, 28 Feb 2012 17:23:55 -0500 Subject: Subject: where's my kenlm update?? From: Chris Dyer --- klm/util/mmap.cc | 123 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 98 insertions(+), 25 deletions(-) (limited to 'klm/util/mmap.cc') diff --git a/klm/util/mmap.cc b/klm/util/mmap.cc index 279bafa8..a329ce4e 100644 --- a/klm/util/mmap.cc +++ b/klm/util/mmap.cc @@ -1,23 +1,63 @@ +/* Memory mapping wrappers. + * ARM and MinGW ports contributed by Hideo Okuma and Tomoyuki Yoshimura at + * NICT. + */ +#include "util/mmap.hh" + #include "util/exception.hh" #include "util/file.hh" -#include "util/mmap.hh" #include #include #include #include -#include +#include #include -#include + +#if defined(_WIN32) || defined(_WIN64) +#include +#include +#else +#include +#endif namespace util { +long SizePage() { +#if defined(_WIN32) || defined(_WIN64) + SYSTEM_INFO si; + GetSystemInfo(&si); + return si.dwAllocationGranularity; +#else + return sysconf(_SC_PAGE_SIZE); +#endif +} + +void SyncOrThrow(void *start, size_t length) { +#if defined(_WIN32) || defined(_WIN64) + UTIL_THROW_IF(!::FlushViewOfFile(start, length), ErrnoException, "Failed to sync mmap"); +#else + UTIL_THROW_IF(msync(start, length, MS_SYNC), ErrnoException, "Failed to sync mmap"); +#endif +} + +void UnmapOrThrow(void *start, size_t length) { +#if defined(_WIN32) || defined(_WIN64) + UTIL_THROW_IF(!::UnmapViewOfFile(start), ErrnoException, "Failed to unmap a file"); +#else + UTIL_THROW_IF(munmap(start, length), ErrnoException, "munmap failed"); +#endif +} + scoped_mmap::~scoped_mmap() { if (data_ != (void*)-1) { - // Thanks Denis Filimonov for pointing out NFS likes msync first. - if (msync(data_, size_, MS_SYNC) || munmap(data_, size_)) { - std::cerr << "msync or mmap failed for " << size_ << " bytes." << std::endl; + try { + // Thanks Denis Filimonov for pointing out NFS likes msync first. + SyncOrThrow(data_, size_); + UnmapOrThrow(data_, size_); + } catch (const util::ErrnoException &e) { + std::cerr << e.what(); abort(); } } @@ -52,29 +92,40 @@ void scoped_memory::call_realloc(std::size_t size) { } } -void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, off_t offset) { +void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, uint64_t offset) { #ifdef MAP_POPULATE // Linux specific if (prefault) { flags |= MAP_POPULATE; } #endif +#if defined(_WIN32) || defined(_WIN64) + int protectC = for_write ? PAGE_READWRITE : PAGE_READONLY; + int protectM = for_write ? FILE_MAP_WRITE : FILE_MAP_READ; + uint64_t total_size = size + offset; + HANDLE hMapping = CreateFileMapping((HANDLE)_get_osfhandle(fd), NULL, protectC, total_size >> 32, static_cast(total_size), NULL); + UTIL_THROW_IF(!hMapping, ErrnoException, "CreateFileMapping failed"); + LPVOID ret = MapViewOfFile(hMapping, protectM, offset >> 32, offset, size); + CloseHandle(hMapping); + UTIL_THROW_IF(!ret, ErrnoException, "MapViewOfFile failed"); +#else int protect = for_write ? (PROT_READ | PROT_WRITE) : PROT_READ; void *ret = mmap(NULL, size, protect, flags, fd, offset); - if (ret == MAP_FAILED) { - UTIL_THROW(ErrnoException, "mmap failed for size " << size << " at offset " << offset); - } + UTIL_THROW_IF(ret == MAP_FAILED, ErrnoException, "mmap failed for size " << size << " at offset " << offset); +#endif return ret; } const int kFileFlags = -#ifdef MAP_FILE +#if defined(_WIN32) || defined(_WIN64) + 0 // MapOrThrow ignores flags on windows +#elif defined(MAP_FILE) MAP_FILE | MAP_SHARED #else MAP_SHARED #endif ; -void MapRead(LoadMethod method, int fd, off_t offset, std::size_t size, scoped_memory &out) { +void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scoped_memory &out) { switch (method) { case LAZY: out.reset(MapOrThrow(size, false, kFileFlags, false, fd, offset), size, scoped_memory::MMAP_ALLOCATED); @@ -91,30 +142,52 @@ void MapRead(LoadMethod method, int fd, off_t offset, std::size_t size, scoped_m case READ: out.reset(malloc(size), size, scoped_memory::MALLOC_ALLOCATED); if (!out.get()) UTIL_THROW(util::ErrnoException, "Allocating " << size << " bytes with malloc"); - if (-1 == lseek(fd, offset, SEEK_SET)) UTIL_THROW(ErrnoException, "lseek to " << offset << " in fd " << fd << " failed."); + SeekOrThrow(fd, offset); ReadOrThrow(fd, out.get(), size); break; } } -void *MapAnonymous(std::size_t size) { - return MapOrThrow(size, true, -#ifdef MAP_ANONYMOUS - MAP_ANONYMOUS // Linux +// Allocates zeroed memory in to. +void MapAnonymous(std::size_t size, util::scoped_memory &to) { + to.reset(); +#if defined(_WIN32) || defined(_WIN64) + to.reset(calloc(1, size), size, scoped_memory::MALLOC_ALLOCATED); +#else + to.reset(MapOrThrow(size, true, +# if defined(MAP_ANONYMOUS) + MAP_ANONYMOUS | MAP_PRIVATE // Linux +# else + MAP_ANON | MAP_PRIVATE // BSD +# endif + , false, -1, 0), size, scoped_memory::MMAP_ALLOCATED); +#endif +} + +void *MapZeroedWrite(int fd, std::size_t size) { + ResizeOrThrow(fd, 0); + ResizeOrThrow(fd, size); + return MapOrThrow(size, true, kFileFlags, false, fd, 0); +} + +namespace { + +int CreateOrThrow(const char *name) { + int ret; +#if defined(_WIN32) || defined(_WIN64) + UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name); #else - MAP_ANON // BSD + UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name); #endif - | MAP_PRIVATE, false, -1, 0); + return ret; } +} // namespace + void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file) { - file.reset(open(name, O_CREAT | O_RDWR | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)); - if (-1 == file.get()) - UTIL_THROW(ErrnoException, "Failed to open " << name << " for writing"); - if (-1 == ftruncate(file.get(), size)) - UTIL_THROW(ErrnoException, "ftruncate on " << name << " to " << size << " failed"); + file.reset(CreateOrThrow(name)); try { - return MapOrThrow(size, true, kFileFlags, false, file.get(), 0); + return MapZeroedWrite(file.get(), size); } catch (ErrnoException &e) { e << " in file " << name; throw; -- cgit v1.2.3 From 249301376865578b7f9678cc97c0f8b6f78623f6 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 9 Mar 2012 16:35:10 -0500 Subject: KenLM d45e2be including CreateOrThrow for Jon --- klm/lm/left.hh | 2 +- klm/util/file.cc | 10 ++++++++++ klm/util/file.hh | 3 +++ klm/util/mmap.cc | 14 -------------- 4 files changed, 14 insertions(+), 15 deletions(-) (limited to 'klm/util/mmap.cc') diff --git a/klm/lm/left.hh b/klm/lm/left.hh index 41f71f84..a07f9803 100644 --- a/klm/lm/left.hh +++ b/klm/lm/left.hh @@ -112,7 +112,7 @@ inline size_t hash_value(const ChartState &state) { size_t hashes[2]; hashes[0] = hash_value(state.left); hashes[1] = hash_value(state.right); - return util::MurmurHashNative(hashes, sizeof(size_t), state.full); + return util::MurmurHashNative(hashes, sizeof(size_t) * 2, state.full); } template class RuleScore { diff --git a/klm/util/file.cc b/klm/util/file.cc index aee7c77a..176737fa 100644 --- a/klm/util/file.cc +++ b/klm/util/file.cc @@ -42,6 +42,16 @@ int OpenReadOrThrow(const char *name) { return ret; } +int CreateOrThrow(const char *name) { + int ret; +#if defined(_WIN32) || defined(_WIN64) + UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name); +#else + UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name); +#endif + return ret; +} + uint64_t SizeFile(int fd) { #if defined(_WIN32) || defined(_WIN64) __int64 ret = _filelengthi64(fd); diff --git a/klm/util/file.hh b/klm/util/file.hh index 5c57e2a9..72c8ea76 100644 --- a/klm/util/file.hh +++ b/klm/util/file.hh @@ -65,7 +65,10 @@ class scoped_FILE { std::FILE *file_; }; +// Open for read only. int OpenReadOrThrow(const char *name); +// Create file if it doesn't exist, truncate if it does. Opened for write. +int CreateOrThrow(const char *name); // Return value for SizeFile when it can't size properly. const uint64_t kBadSize = (uint64_t)-1; diff --git a/klm/util/mmap.cc b/klm/util/mmap.cc index a329ce4e..3b1c58b8 100644 --- a/klm/util/mmap.cc +++ b/klm/util/mmap.cc @@ -170,20 +170,6 @@ void *MapZeroedWrite(int fd, std::size_t size) { return MapOrThrow(size, true, kFileFlags, false, fd, 0); } -namespace { - -int CreateOrThrow(const char *name) { - int ret; -#if defined(_WIN32) || defined(_WIN64) - UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name); -#else - UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name); -#endif - return ret; -} - -} // namespace - void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file) { file.reset(CreateOrThrow(name)); try { -- cgit v1.2.3