summaryrefslogtreecommitdiff
path: root/klm/util/file_piece.hh
diff options
context:
space:
mode:
Diffstat (limited to 'klm/util/file_piece.hh')
-rw-r--r--klm/util/file_piece.hh54
1 files changed, 26 insertions, 28 deletions
diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh
index af93d8aa..1b110287 100644
--- a/klm/util/file_piece.hh
+++ b/klm/util/file_piece.hh
@@ -4,19 +4,16 @@
#include "util/ersatz_progress.hh"
#include "util/exception.hh"
#include "util/file.hh"
-#include "util/have.hh"
#include "util/mmap.hh"
+#include "util/read_compressed.hh"
#include "util/string_piece.hh"
#include <cstddef>
+#include <iosfwd>
#include <string>
#include <stdint.h>
-#ifdef HAVE_ZLIB
-#include <zlib.h>
-#endif
-
namespace util {
class ParseNumberException : public Exception {
@@ -25,28 +22,26 @@ class ParseNumberException : public Exception {
~ParseNumberException() throw() {}
};
-class GZException : public Exception {
- public:
-#ifdef HAVE_ZLIB
- explicit GZException(gzFile file);
-#endif
- GZException() throw() {}
- ~GZException() throw() {}
-};
-
extern const bool kSpaces[256];
-// Memory backing the returned StringPiece may vanish on the next call.
+// Memory backing the returned StringPiece may vanish on the next call.
class FilePiece {
public:
- // 32 MB default.
- explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 33554432);
- // Takes ownership of fd. name is used for messages.
- explicit FilePiece(int fd, const char *name, std::ostream *show_progress = NULL, std::size_t min_buffer = 33554432);
+ // 1 MB default.
+ explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
+ // Takes ownership of fd. name is used for messages.
+ explicit FilePiece(int fd, const char *name = NULL, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576);
+
+ /* Read from an istream. Don't use this if you can avoid it. Raw fd IO is
+ * much faster. But sometimes you just have an istream like Boost's HTTP
+ * server and want to parse it the same way.
+ * name is just used for messages and FileName().
+ */
+ explicit FilePiece(std::istream &stream, const char *name = NULL, std::size_t min_buffer = 1048576);
~FilePiece();
-
- char get() {
+
+ char get() {
if (position_ == position_end_) {
Shift();
if (at_end_) throw EndOfFileException();
@@ -54,14 +49,14 @@ class FilePiece {
return *(position_++);
}
- // Leaves the delimiter, if any, to be returned by get(). Delimiters defined by isspace().
+ // Leaves the delimiter, if any, to be returned by get(). Delimiters defined by isspace().
StringPiece ReadDelimited(const bool *delim = kSpaces) {
SkipSpaces(delim);
return Consume(FindDelimiterOrEOF(delim));
}
// Unlike ReadDelimited, this includes leading spaces and consumes the delimiter.
- // It is similar to getline in that way.
+ // It is similar to getline in that way.
StringPiece ReadLine(char delim = '\n');
float ReadFloat();
@@ -69,7 +64,10 @@ class FilePiece {
long int ReadLong();
unsigned long int ReadULong();
- // Skip spaces defined by isspace.
+ // Fake read() function. Reads up to limit bytes, returning the amount read. Returns 0 on EOF || limit == 0.
+ std::size_t Raw(void *to, std::size_t limit);
+
+ // Skip spaces defined by being in delim.
void SkipSpaces(const bool *delim = kSpaces) {
for (; ; ++position_) {
if (position_ == position_end_) Shift();
@@ -82,8 +80,10 @@ class FilePiece {
}
const std::string &FileName() const { return file_name_; }
-
+
private:
+ void InitializeNoRead(const char *name, std::size_t min_buffer);
+ // Calls InitializeNoRead, so don't call both.
void Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer);
template <class T> T ReadNumber();
@@ -122,9 +122,7 @@ class FilePiece {
std::string file_name_;
-#ifdef HAVE_ZLIB
- gzFile gz_file_;
-#endif // HAVE_ZLIB
+ ReadCompressed fell_back_;
};
} // namespace util