From b35a7f3a96ff8ae42e15922dd6949bf9f5d15501 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Tue, 22 Jan 2013 21:37:49 +0000 Subject: KenLM 58da338b --- klm/util/file_piece.cc | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'klm/util/file_piece.cc') diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index fbfa0e0e..4d143857 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -49,6 +49,18 @@ FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std: Initialize(NamePossiblyFind(fd, name).c_str(), show_progress, min_buffer); } +FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buffer) : + total_size_(kBadSize), page_(SizePage()) { + InitializeNoRead("istream", min_buffer); + + fallback_to_read_ = true; + data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED); + position_ = data_.begin(); + position_end_ = position_; + + fell_back_.Reset(stream); +} + FilePiece::~FilePiece() {} StringPiece FilePiece::ReadLine(char delim) { @@ -83,7 +95,8 @@ unsigned long int FilePiece::ReadULong() { return ReadNumber(); } -void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer) { +// Factored out so that istream can call this. +void FilePiece::InitializeNoRead(const char *name, std::size_t min_buffer) { file_name_ = name; default_map_size_ = page_ * std::max((min_buffer / page_ + 1), 2); @@ -91,6 +104,10 @@ void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::s position_end_ = NULL; mapped_offset_ = 0; at_end_ = false; +} + +void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer) { + InitializeNoRead(name, min_buffer); if (total_size_ == kBadSize) { // So the assertion passes. @@ -239,8 +256,7 @@ void FilePiece::TransitionToRead() { assert(!fallback_to_read_); fallback_to_read_ = true; data_.reset(); - data_.reset(malloc(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED); - UTIL_THROW_IF(!data_.get(), ErrnoException, "malloc failed for " << default_map_size_); + data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED); position_ = data_.begin(); position_end_ = position_; -- cgit v1.2.3 From abf044fb2e5dcea5ba4dd54ca16c4e75e1c1295b Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 23 Jan 2013 21:46:03 +0000 Subject: Completely untested Raw read call --- klm/util/file_piece.cc | 28 +++++++++++++++++++++++++++- klm/util/file_piece.hh | 5 ++++- 2 files changed, 31 insertions(+), 2 deletions(-) (limited to 'klm/util/file_piece.cc') diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index 4d143857..9de30fc4 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -51,7 +51,7 @@ FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std: FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buffer) : total_size_(kBadSize), page_(SizePage()) { - InitializeNoRead("istream", min_buffer); + InitializeNoRead(name ? name : "istream", min_buffer); fallback_to_read_ = true; data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED); @@ -95,6 +95,32 @@ unsigned long int FilePiece::ReadULong() { return ReadNumber(); } +std::size_t FilePiece::Raw(void *to, std::size_t limit) { + if (!limit) return 0; + std::size_t in_buf = static_cast(position_end_ - position_); + if (in_buf) { + std::size_t amount = std::min(in_buf, limit); + memcpy(to, position_, amount); + position_ += amount; + return amount; + } + + std::size_t read_return; + if (fallback_to_read_) { + read_return = fell_back_.Read(to, limit); + progress_.Set(fell_back_.RawAmount()); + } else { + uint64_t desired_begin = mapped_offset_ + static_cast(position_ - data_.begin()); + SeekOrThrow(file_.get(), desired_begin); + read_return = ReadOrEOF(file_.get(), to, limit); + // Good thing we never rewind. This makes desired_begin calculate the right way the next time. + mapped_offset_ += static_cast(read_return); + progress_ += read_return; + } + at_end_ |= (read_return == 0); + return read_return; +} + // Factored out so that istream can call this. void FilePiece::InitializeNoRead(const char *name, std::size_t min_buffer) { file_name_ = name; diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh index c07c6011..1b110287 100644 --- a/klm/util/file_piece.hh +++ b/klm/util/file_piece.hh @@ -64,7 +64,10 @@ class FilePiece { long int ReadLong(); unsigned long int ReadULong(); - // Skip spaces defined by isspace. + // Fake read() function. Reads up to limit bytes, returning the amount read. Returns 0 on EOF || limit == 0. + std::size_t Raw(void *to, std::size_t limit); + + // Skip spaces defined by being in delim. void SkipSpaces(const bool *delim = kSpaces) { for (; ; ++position_) { if (position_ == position_end_) Shift(); -- cgit v1.2.3