diff options
Diffstat (limited to 'klm/util/file_piece.hh')
-rw-r--r-- | klm/util/file_piece.hh | 54 |
1 files changed, 26 insertions, 28 deletions
diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh index af93d8aa..1b110287 100644 --- a/klm/util/file_piece.hh +++ b/klm/util/file_piece.hh @@ -4,19 +4,16 @@ #include "util/ersatz_progress.hh" #include "util/exception.hh" #include "util/file.hh" -#include "util/have.hh" #include "util/mmap.hh" +#include "util/read_compressed.hh" #include "util/string_piece.hh" #include <cstddef> +#include <iosfwd> #include <string> #include <stdint.h> -#ifdef HAVE_ZLIB -#include <zlib.h> -#endif - namespace util { class ParseNumberException : public Exception { @@ -25,28 +22,26 @@ class ParseNumberException : public Exception { ~ParseNumberException() throw() {} }; -class GZException : public Exception { - public: -#ifdef HAVE_ZLIB - explicit GZException(gzFile file); -#endif - GZException() throw() {} - ~GZException() throw() {} -}; - extern const bool kSpaces[256]; -// Memory backing the returned StringPiece may vanish on the next call. +// Memory backing the returned StringPiece may vanish on the next call. class FilePiece { public: - // 32 MB default. - explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 33554432); - // Takes ownership of fd. name is used for messages. - explicit FilePiece(int fd, const char *name, std::ostream *show_progress = NULL, std::size_t min_buffer = 33554432); + // 1 MB default. + explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576); + // Takes ownership of fd. name is used for messages. + explicit FilePiece(int fd, const char *name = NULL, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576); + + /* Read from an istream. Don't use this if you can avoid it. Raw fd IO is + * much faster. But sometimes you just have an istream like Boost's HTTP + * server and want to parse it the same way. + * name is just used for messages and FileName(). + */ + explicit FilePiece(std::istream &stream, const char *name = NULL, std::size_t min_buffer = 1048576); ~FilePiece(); - - char get() { + + char get() { if (position_ == position_end_) { Shift(); if (at_end_) throw EndOfFileException(); @@ -54,14 +49,14 @@ class FilePiece { return *(position_++); } - // Leaves the delimiter, if any, to be returned by get(). Delimiters defined by isspace(). + // Leaves the delimiter, if any, to be returned by get(). Delimiters defined by isspace(). StringPiece ReadDelimited(const bool *delim = kSpaces) { SkipSpaces(delim); return Consume(FindDelimiterOrEOF(delim)); } // Unlike ReadDelimited, this includes leading spaces and consumes the delimiter. - // It is similar to getline in that way. + // It is similar to getline in that way. StringPiece ReadLine(char delim = '\n'); float ReadFloat(); @@ -69,7 +64,10 @@ class FilePiece { long int ReadLong(); unsigned long int ReadULong(); - // Skip spaces defined by isspace. + // Fake read() function. Reads up to limit bytes, returning the amount read. Returns 0 on EOF || limit == 0. + std::size_t Raw(void *to, std::size_t limit); + + // Skip spaces defined by being in delim. void SkipSpaces(const bool *delim = kSpaces) { for (; ; ++position_) { if (position_ == position_end_) Shift(); @@ -82,8 +80,10 @@ class FilePiece { } const std::string &FileName() const { return file_name_; } - + private: + void InitializeNoRead(const char *name, std::size_t min_buffer); + // Calls InitializeNoRead, so don't call both. void Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer); template <class T> T ReadNumber(); @@ -122,9 +122,7 @@ class FilePiece { std::string file_name_; -#ifdef HAVE_ZLIB - gzFile gz_file_; -#endif // HAVE_ZLIB + ReadCompressed fell_back_; }; } // namespace util |