diff options
author | Paul Baltescu <pauldb89@gmail.com> | 2013-02-21 14:13:55 +0000 |
---|---|---|
committer | Paul Baltescu <pauldb89@gmail.com> | 2013-02-21 14:13:55 +0000 |
commit | b5491898549c61bd799d199aa9178a8394a1ef69 (patch) | |
tree | fb2686a2aae03ff07bcdf4cd47e8c3191eff8d1e /klm/util/file_piece.cc | |
parent | 0187447a643c3ea262b13b3052cb1531990eafe6 (diff) | |
parent | c17d9c23d023a5c08656376944f636180f0a437b (diff) |
Merge branch 'master' of https://github.com/pauldb89/cdec
Diffstat (limited to 'klm/util/file_piece.cc')
-rw-r--r-- | klm/util/file_piece.cc | 114 |
1 files changed, 89 insertions, 25 deletions
diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index 5a208eff..9de30fc4 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -1,13 +1,15 @@ #include "util/file_piece.hh" +#include "util/double-conversion/double-conversion.h" #include "util/exception.hh" #include "util/file.hh" #include "util/mmap.hh" -#ifdef WIN32 + +#if defined(_WIN32) || defined(_WIN64) #include <io.h> #else #include <unistd.h> -#endif // WIN32 +#endif #include <iostream> #include <string> @@ -34,10 +36,29 @@ FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t Initialize(name, show_progress, min_buffer); } -FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) : +namespace { +std::string NamePossiblyFind(int fd, const char *name) { + if (name) return name; + return NameFromFD(fd); +} +} // namespace + +FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) : file_(fd), total_size_(SizeFile(file_.get())), page_(SizePage()), - progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) { - Initialize(name, show_progress, min_buffer); + progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + NamePossiblyFind(fd, name)) { + Initialize(NamePossiblyFind(fd, name).c_str(), show_progress, min_buffer); +} + +FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buffer) : + total_size_(kBadSize), page_(SizePage()) { + InitializeNoRead(name ? name : "istream", min_buffer); + + fallback_to_read_ = true; + data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED); + position_ = data_.begin(); + position_end_ = position_; + + fell_back_.Reset(stream); } FilePiece::~FilePiece() {} @@ -74,7 +95,34 @@ unsigned long int FilePiece::ReadULong() { return ReadNumber<unsigned long int>(); } -void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer) { +std::size_t FilePiece::Raw(void *to, std::size_t limit) { + if (!limit) return 0; + std::size_t in_buf = static_cast<std::size_t>(position_end_ - position_); + if (in_buf) { + std::size_t amount = std::min(in_buf, limit); + memcpy(to, position_, amount); + position_ += amount; + return amount; + } + + std::size_t read_return; + if (fallback_to_read_) { + read_return = fell_back_.Read(to, limit); + progress_.Set(fell_back_.RawAmount()); + } else { + uint64_t desired_begin = mapped_offset_ + static_cast<uint64_t>(position_ - data_.begin()); + SeekOrThrow(file_.get(), desired_begin); + read_return = ReadOrEOF(file_.get(), to, limit); + // Good thing we never rewind. This makes desired_begin calculate the right way the next time. + mapped_offset_ += static_cast<uint64_t>(read_return); + progress_ += read_return; + } + at_end_ |= (read_return == 0); + return read_return; +} + +// Factored out so that istream can call this. +void FilePiece::InitializeNoRead(const char *name, std::size_t min_buffer) { file_name_ = name; default_map_size_ = page_ * std::max<std::size_t>((min_buffer / page_ + 1), 2); @@ -82,6 +130,10 @@ void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::s position_end_ = NULL; mapped_offset_ = 0; at_end_ = false; +} + +void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer) { + InitializeNoRead(name, min_buffer); if (total_size_ == kBadSize) { // So the assertion passes. @@ -103,21 +155,33 @@ void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::s } namespace { -void ParseNumber(const char *begin, char *&end, float &out) { -#if defined(sun) || defined(WIN32) - out = static_cast<float>(strtod(begin, &end)); -#else - out = strtof(begin, &end); -#endif + +static const double_conversion::StringToDoubleConverter kConverter( + double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK | double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES, + std::numeric_limits<double>::quiet_NaN(), + std::numeric_limits<double>::quiet_NaN(), + "inf", + "NaN"); + +void ParseNumber(const char *begin, const char *&end, float &out) { + int count; + out = kConverter.StringToFloat(begin, end - begin, &count); + end = begin + count; } -void ParseNumber(const char *begin, char *&end, double &out) { - out = strtod(begin, &end); +void ParseNumber(const char *begin, const char *&end, double &out) { + int count; + out = kConverter.StringToDouble(begin, end - begin, &count); + end = begin + count; } -void ParseNumber(const char *begin, char *&end, long int &out) { - out = strtol(begin, &end, 10); +void ParseNumber(const char *begin, const char *&end, long int &out) { + char *silly_end; + out = strtol(begin, &silly_end, 10); + end = silly_end; } -void ParseNumber(const char *begin, char *&end, unsigned long int &out) { - out = strtoul(begin, &end, 10); +void ParseNumber(const char *begin, const char *&end, unsigned long int &out) { + char *silly_end; + out = strtoul(begin, &silly_end, 10); + end = silly_end; } } // namespace @@ -127,16 +191,17 @@ template <class T> T FilePiece::ReadNumber() { if (at_end_) { // Hallucinate a null off the end of the file. std::string buffer(position_, position_end_); - char *end; + const char *buf = buffer.c_str(); + const char *end = buf + buffer.size(); T ret; - ParseNumber(buffer.c_str(), end, ret); - if (buffer.c_str() == end) throw ParseNumberException(buffer); - position_ += end - buffer.c_str(); + ParseNumber(buf, end, ret); + if (buf == end) throw ParseNumberException(buffer); + position_ += end - buf; return ret; } Shift(); } - char *end; + const char *end = last_space_; T ret; ParseNumber(position_, end, ret); if (end == position_) throw ParseNumberException(ReadDelimited()); @@ -217,8 +282,7 @@ void FilePiece::TransitionToRead() { assert(!fallback_to_read_); fallback_to_read_ = true; data_.reset(); - data_.reset(malloc(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED); - UTIL_THROW_IF(!data_.get(), ErrnoException, "malloc failed for " << default_map_size_); + data_.reset(MallocOrThrow(default_map_size_), default_map_size_, scoped_memory::MALLOC_ALLOCATED); position_ = data_.begin(); position_end_ = position_; |