diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2013-03-03 12:06:43 +0100 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2013-03-03 12:06:43 +0100 |
commit | f7f9048f8e4d34682f17bfd050d238005feb3ee3 (patch) | |
tree | fa20fa16b0f5a8009a9254622b65ebeaec049399 /klm/util/file_piece.cc | |
parent | 9d306b30c9abba995ba35243e5cb461bb472a61f (diff) | |
parent | 12f2eab0e7dc7167af47cddf8ef88968656277da (diff) |
Merge branch 'master' of github.com:pks/cdec-dtrain
Diffstat (limited to 'klm/util/file_piece.cc')
-rw-r--r-- | klm/util/file_piece.cc | 48 |
1 files changed, 45 insertions, 3 deletions
diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index fbfa0e0e..9de30fc4 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -49,6 +49,18 @@ FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std: Initialize(NamePossiblyFind(fd, name).c_str(), show_progress, min_buffer); } +FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buffer) : + total_size_(kBadSize), page_(SizePage()) { + InitializeNoRead(name ? name : "istream", min_buffer); + + fallback_to_read_ = true; + data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED); + position_ = data_.begin(); + position_end_ = position_; + + fell_back_.Reset(stream); +} + FilePiece::~FilePiece() {} StringPiece FilePiece::ReadLine(char delim) { @@ -83,7 +95,34 @@ unsigned long int FilePiece::ReadULong() { return ReadNumber<unsigned long int>(); } -void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer) { +std::size_t FilePiece::Raw(void *to, std::size_t limit) { + if (!limit) return 0; + std::size_t in_buf = static_cast<std::size_t>(position_end_ - position_); + if (in_buf) { + std::size_t amount = std::min(in_buf, limit); + memcpy(to, position_, amount); + position_ += amount; + return amount; + } + + std::size_t read_return; + if (fallback_to_read_) { + read_return = fell_back_.Read(to, limit); + progress_.Set(fell_back_.RawAmount()); + } else { + uint64_t desired_begin = mapped_offset_ + static_cast<uint64_t>(position_ - data_.begin()); + SeekOrThrow(file_.get(), desired_begin); + read_return = ReadOrEOF(file_.get(), to, limit); + // Good thing we never rewind. This makes desired_begin calculate the right way the next time. + mapped_offset_ += static_cast<uint64_t>(read_return); + progress_ += read_return; + } + at_end_ |= (read_return == 0); + return read_return; +} + +// Factored out so that istream can call this. +void FilePiece::InitializeNoRead(const char *name, std::size_t min_buffer) { file_name_ = name; default_map_size_ = page_ * std::max<std::size_t>((min_buffer / page_ + 1), 2); @@ -91,6 +130,10 @@ void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::s position_end_ = NULL; mapped_offset_ = 0; at_end_ = false; +} + +void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer) { + InitializeNoRead(name, min_buffer); if (total_size_ == kBadSize) { // So the assertion passes. @@ -239,8 +282,7 @@ void FilePiece::TransitionToRead() { assert(!fallback_to_read_); fallback_to_read_ = true; data_.reset(); - data_.reset(malloc(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED); - UTIL_THROW_IF(!data_.get(), ErrnoException, "malloc failed for " << default_map_size_); + data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED); position_ = data_.begin(); position_end_ = position_; |