From 47a656c0b6fdba8f91f2c5808234cbb1de682652 Mon Sep 17 00:00:00 2001 From: redpony Date: Wed, 10 Nov 2010 02:02:04 +0000 Subject: new version of klm git-svn-id: https://ws10smt.googlecode.com/svn/trunk@706 ec762483-ff6d-05da-a07a-a48fb63a330f --- klm/util/file_piece.hh | 51 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 16 deletions(-) (limited to 'klm/util/file_piece.hh') diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh index 704f0ac6..11d4a751 100644 --- a/klm/util/file_piece.hh +++ b/klm/util/file_piece.hh @@ -11,6 +11,8 @@ #include +#define HAVE_ZLIB + namespace util { class EndOfFileException : public Exception { @@ -25,6 +27,13 @@ class ParseNumberException : public Exception { ~ParseNumberException() throw() {} }; +class GZException : public Exception { + public: + explicit GZException(void *file); + GZException() throw() {} + ~GZException() throw() {} +}; + int OpenReadOrThrow(const char *name); // Return value for SizeFile when it can't size properly. @@ -34,40 +43,42 @@ off_t SizeFile(int fd); class FilePiece { public: // 32 MB default. - explicit FilePiece(const char *file, std::ostream *show_progress = NULL, off_t min_buffer = 33554432); + explicit FilePiece(const char *file, std::ostream *show_progress = NULL, off_t min_buffer = 33554432) throw(GZException); // Takes ownership of fd. name is used for messages. - explicit FilePiece(const char *name, int fd, std::ostream *show_progress = NULL, off_t min_buffer = 33554432); + explicit FilePiece(int fd, const char *name, std::ostream *show_progress = NULL, off_t min_buffer = 33554432) throw(GZException); + + ~FilePiece(); - char get() throw(EndOfFileException) { - if (position_ == position_end_) Shift(); + char get() throw(GZException, EndOfFileException) { + if (position_ == position_end_) { + Shift(); + if (at_end_) throw EndOfFileException(); + } return *(position_++); } // Memory backing the returned StringPiece may vanish on the next call. // Leaves the delimiter, if any, to be returned by get(). - StringPiece ReadDelimited() throw(EndOfFileException) { + StringPiece ReadDelimited() throw(GZException, EndOfFileException) { SkipSpaces(); return Consume(FindDelimiterOrEOF()); } // Unlike ReadDelimited, this includes leading spaces and consumes the delimiter. // It is similar to getline in that way. - StringPiece ReadLine(char delim = '\n') throw(EndOfFileException); + StringPiece ReadLine(char delim = '\n') throw(GZException, EndOfFileException); - float ReadFloat() throw(EndOfFileException, ParseNumberException); + float ReadFloat() throw(GZException, EndOfFileException, ParseNumberException); - void SkipSpaces() throw (EndOfFileException); + void SkipSpaces() throw (GZException, EndOfFileException); off_t Offset() const { return position_ - data_.begin() + mapped_offset_; } - // Only for testing. - void ForceFallbackToRead() { - fallback_to_read_ = true; - } + const std::string &FileName() const { return file_name_; } private: - void Initialize(const char *name, std::ostream *show_progress, off_t min_buffer); + void Initialize(const char *name, std::ostream *show_progress, off_t min_buffer) throw(GZException); StringPiece Consume(const char *to) { StringPiece ret(position_, to - position_); @@ -75,12 +86,14 @@ class FilePiece { return ret; } - const char *FindDelimiterOrEOF() throw(EndOfFileException); + const char *FindDelimiterOrEOF() throw(EndOfFileException, GZException); - void Shift() throw (EndOfFileException); + void Shift() throw (EndOfFileException, GZException); // Backends to Shift(). void MMapShift(off_t desired_begin) throw (); - void ReadShift(off_t desired_begin) throw (); + + void TransitionToRead() throw (GZException); + void ReadShift() throw (GZException, EndOfFileException); const char *position_, *last_space_, *position_end_; @@ -98,6 +111,12 @@ class FilePiece { bool fallback_to_read_; ErsatzProgress progress_; + + std::string file_name_; + +#ifdef HAVE_ZLIB + void *gz_file_; +#endif // HAVE_ZLIB }; } // namespace util -- cgit v1.2.3