summaryrefslogtreecommitdiff
path: root/klm/util
diff options
context:
space:
mode:
Diffstat (limited to 'klm/util')
-rw-r--r--klm/util/file_piece.cc28
-rw-r--r--klm/util/file_piece.hh5
2 files changed, 31 insertions, 2 deletions
diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc
index 4d143857..9de30fc4 100644
--- a/klm/util/file_piece.cc
+++ b/klm/util/file_piece.cc
@@ -51,7 +51,7 @@ FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std:
FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buffer) :
total_size_(kBadSize), page_(SizePage()) {
- InitializeNoRead("istream", min_buffer);
+ InitializeNoRead(name ? name : "istream", min_buffer);
fallback_to_read_ = true;
data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED);
@@ -95,6 +95,32 @@ unsigned long int FilePiece::ReadULong() {
return ReadNumber<unsigned long int>();
}
+std::size_t FilePiece::Raw(void *to, std::size_t limit) {
+ if (!limit) return 0;
+ std::size_t in_buf = static_cast<std::size_t>(position_end_ - position_);
+ if (in_buf) {
+ std::size_t amount = std::min(in_buf, limit);
+ memcpy(to, position_, amount);
+ position_ += amount;
+ return amount;
+ }
+
+ std::size_t read_return;
+ if (fallback_to_read_) {
+ read_return = fell_back_.Read(to, limit);
+ progress_.Set(fell_back_.RawAmount());
+ } else {
+ uint64_t desired_begin = mapped_offset_ + static_cast<uint64_t>(position_ - data_.begin());
+ SeekOrThrow(file_.get(), desired_begin);
+ read_return = ReadOrEOF(file_.get(), to, limit);
+ // Good thing we never rewind. This makes desired_begin calculate the right way the next time.
+ mapped_offset_ += static_cast<uint64_t>(read_return);
+ progress_ += read_return;
+ }
+ at_end_ |= (read_return == 0);
+ return read_return;
+}
+
// Factored out so that istream can call this.
void FilePiece::InitializeNoRead(const char *name, std::size_t min_buffer) {
file_name_ = name;
diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh
index c07c6011..1b110287 100644
--- a/klm/util/file_piece.hh
+++ b/klm/util/file_piece.hh
@@ -64,7 +64,10 @@ class FilePiece {
long int ReadLong();
unsigned long int ReadULong();
- // Skip spaces defined by isspace.
+ // Fake read() function. Reads up to limit bytes, returning the amount read. Returns 0 on EOF || limit == 0.
+ std::size_t Raw(void *to, std::size_t limit);
+
+ // Skip spaces defined by being in delim.
void SkipSpaces(const bool *delim = kSpaces) {
for (; ; ++position_) {
if (position_ == position_end_) Shift();