From 2b63fa0755954edf467a2421997eaf72771260cf Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 16 May 2012 13:24:08 -0700 Subject: Big kenlm change includes lower order models for probing only. And other stuff. --- klm/util/file_piece.cc | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'klm/util/file_piece.cc') diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index 081e662b..7b6a01dd 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -18,31 +18,35 @@ #include #include +#ifdef HAVE_ZLIB +#include +#endif + namespace util { ParseNumberException::ParseNumberException(StringPiece value) throw() { *this << "Could not parse \"" << value << "\" into a number"; } +GZException::GZException(void *file) { #ifdef HAVE_ZLIB -GZException::GZException(gzFile file) { int num; - *this << gzerror( file, &num) << " from zlib"; -} + *this << gzerror(file, &num) << " from zlib"; #endif // HAVE_ZLIB +} // Sigh this is the only way I could come up with to do a _const_ bool. It has ' ', '\f', '\n', '\r', '\t', and '\v' (same as isspace on C locale). const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t min_buffer) : file_(OpenReadOrThrow(name)), total_size_(SizeFile(file_.get())), page_(SizePage()), - progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) { + progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) { Initialize(name, show_progress, min_buffer); } FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) : file_(fd), total_size_(SizeFile(file_.get())), page_(SizePage()), - progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) { + progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) { Initialize(name, show_progress, min_buffer); } @@ -149,8 +153,9 @@ template T FilePiece::ReadNumber() { SkipSpaces(); while (last_space_ < position_) { if (at_end_) { + if (position_ >= position_end_) throw EndOfFileException(); // Hallucinate a null off the end of the file. - std::string buffer(position_, position_end_); + std::string buffer(position_, position_end_ - position_); char *end; T ret; ParseNumber(buffer.c_str(), end, ret); -- cgit v1.2.3 From 84b2894b639aa7de689a75dad8077639cbb03879 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Mon, 28 May 2012 12:29:05 -0400 Subject: Fix FilePiece gzip --- klm/util/file_piece.cc | 17 ++++++----------- klm/util/file_piece.hh | 10 ++++++++-- 2 files changed, 14 insertions(+), 13 deletions(-) (limited to 'klm/util/file_piece.cc') diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index 7b6a01dd..081e662b 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -18,35 +18,31 @@ #include #include -#ifdef HAVE_ZLIB -#include -#endif - namespace util { ParseNumberException::ParseNumberException(StringPiece value) throw() { *this << "Could not parse \"" << value << "\" into a number"; } -GZException::GZException(void *file) { #ifdef HAVE_ZLIB +GZException::GZException(gzFile file) { int num; - *this << gzerror(file, &num) << " from zlib"; -#endif // HAVE_ZLIB + *this << gzerror( file, &num) << " from zlib"; } +#endif // HAVE_ZLIB // Sigh this is the only way I could come up with to do a _const_ bool. It has ' ', '\f', '\n', '\r', '\t', and '\v' (same as isspace on C locale). const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t min_buffer) : file_(OpenReadOrThrow(name)), total_size_(SizeFile(file_.get())), page_(SizePage()), - progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) { + progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) { Initialize(name, show_progress, min_buffer); } FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) : file_(fd), total_size_(SizeFile(file_.get())), page_(SizePage()), - progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) { + progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) { Initialize(name, show_progress, min_buffer); } @@ -153,9 +149,8 @@ template T FilePiece::ReadNumber() { SkipSpaces(); while (last_space_ < position_) { if (at_end_) { - if (position_ >= position_end_) throw EndOfFileException(); // Hallucinate a null off the end of the file. - std::string buffer(position_, position_end_ - position_); + std::string buffer(position_, position_end_); char *end; T ret; ParseNumber(buffer.c_str(), end, ret); diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh index b81ac0e2..af93d8aa 100644 --- a/klm/util/file_piece.hh +++ b/klm/util/file_piece.hh @@ -13,6 +13,10 @@ #include +#ifdef HAVE_ZLIB +#include +#endif + namespace util { class ParseNumberException : public Exception { @@ -23,7 +27,9 @@ class ParseNumberException : public Exception { class GZException : public Exception { public: - explicit GZException(void *file); +#ifdef HAVE_ZLIB + explicit GZException(gzFile file); +#endif GZException() throw() {} ~GZException() throw() {} }; @@ -117,7 +123,7 @@ class FilePiece { std::string file_name_; #ifdef HAVE_ZLIB - void *gz_file_; + gzFile gz_file_; #endif // HAVE_ZLIB }; -- cgit v1.2.3 From 7d1a63de4894de55f152bb806c85d42b745b9661 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Mon, 28 May 2012 12:33:16 -0400 Subject: Fix FilePiece and progress --- klm/util/file_piece.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'klm/util/file_piece.cc') diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index 081e662b..a205995a 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -36,13 +36,13 @@ const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0 FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t min_buffer) : file_(OpenReadOrThrow(name)), total_size_(SizeFile(file_.get())), page_(SizePage()), - progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) { + progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) { Initialize(name, show_progress, min_buffer); } FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) : file_(fd), total_size_(SizeFile(file_.get())), page_(SizePage()), - progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) { + progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) { Initialize(name, show_progress, min_buffer); } -- cgit v1.2.3