diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2010-12-13 16:18:34 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2010-12-13 16:18:34 -0500 |
commit | be98f29f51350c24136c191f01af3fbfe340ef78 (patch) | |
tree | 2e104152110ca76b527147458050a41934e031f2 /klm/util/file_piece.cc | |
parent | 063c0623aaf5dad8d02e5eae5793c123cd7fc3fe (diff) |
new version of kenlm
Diffstat (limited to 'klm/util/file_piece.cc')
-rw-r--r-- | klm/util/file_piece.cc | 99 |
1 files changed, 67 insertions, 32 deletions
diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index e7bd8659..5a667ebb 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -2,12 +2,12 @@ #include "util/exception.hh" +#include <iostream> #include <string> #include <limits> #include <assert.h> #include <ctype.h> -#include <err.h> #include <fcntl.h> #include <stdlib.h> #include <sys/mman.h> @@ -27,7 +27,7 @@ EndOfFileException::EndOfFileException() throw() { EndOfFileException::~EndOfFileException() throw() {} ParseNumberException::ParseNumberException(StringPiece value) throw() { - *this << "Could not parse \"" << value << "\" into a float"; + *this << "Could not parse \"" << value << "\" into a number"; } GZException::GZException(void *file) { @@ -68,12 +68,52 @@ FilePiece::~FilePiece() { file_.release(); int ret; if (Z_OK != (ret = gzclose(gz_file_))) { - errx(1, "could not close file %s using zlib", file_name_.c_str()); + std::cerr << "could not close file " << file_name_ << " using zlib" << std::endl; + abort(); } } #endif } +StringPiece FilePiece::ReadLine(char delim) throw (GZException, EndOfFileException) { + const char *start = position_; + do { + for (const char *i = start; i < position_end_; ++i) { + if (*i == delim) { + StringPiece ret(position_, i - position_); + position_ = i + 1; + return ret; + } + } + size_t skip = position_end_ - position_; + Shift(); + start = position_ + skip; + } while (!at_end_); + StringPiece ret(position_, position_end_ - position_); + position_ = position_end_; + return ret; +} + +float FilePiece::ReadFloat() throw(GZException, EndOfFileException, ParseNumberException) { + return ReadNumber<float>(); +} +double FilePiece::ReadDouble() throw(GZException, EndOfFileException, ParseNumberException) { + return ReadNumber<double>(); +} +long int FilePiece::ReadLong() throw(GZException, EndOfFileException, ParseNumberException) { + return ReadNumber<long int>(); +} +unsigned long int FilePiece::ReadULong() throw(GZException, EndOfFileException, ParseNumberException) { + return ReadNumber<unsigned long int>(); +} + +void FilePiece::SkipSpaces() throw (GZException, EndOfFileException) { + for (; ; ++position_) { + if (position_ == position_end_) Shift(); + if (!isspace(*position_)) return; + } +} + void FilePiece::Initialize(const char *name, std::ostream *show_progress, off_t min_buffer) throw (GZException) { #ifdef HAVE_ZLIB gz_file_ = NULL; @@ -108,14 +148,34 @@ void FilePiece::Initialize(const char *name, std::ostream *show_progress, off_t } } -float FilePiece::ReadFloat() throw(GZException, EndOfFileException, ParseNumberException) { +namespace { +void ParseNumber(const char *begin, char *&end, float &out) { +#ifdef sun + out = static_cast<float>(strtod(begin, &end)); +#else + out = strtof(begin, &end); +#endif +} +void ParseNumber(const char *begin, char *&end, double &out) { + out = strtod(begin, &end); +} +void ParseNumber(const char *begin, char *&end, long int &out) { + out = strtol(begin, &end, 10); +} +void ParseNumber(const char *begin, char *&end, unsigned long int &out) { + out = strtoul(begin, &end, 10); +} +} // namespace + +template <class T> T FilePiece::ReadNumber() throw(GZException, EndOfFileException, ParseNumberException) { SkipSpaces(); while (last_space_ < position_) { if (at_end_) { // Hallucinate a null off the end of the file. std::string buffer(position_, position_end_); char *end; - float ret = strtof(buffer.c_str(), &end); + T ret; + ParseNumber(buffer.c_str(), end, ret); if (buffer.c_str() == end) throw ParseNumberException(buffer); position_ += end - buffer.c_str(); return ret; @@ -123,19 +183,13 @@ float FilePiece::ReadFloat() throw(GZException, EndOfFileException, ParseNumberE Shift(); } char *end; - float ret = strtof(position_, &end); + T ret; + ParseNumber(position_, end, ret); if (end == position_) throw ParseNumberException(ReadDelimited()); position_ = end; return ret; } -void FilePiece::SkipSpaces() throw (GZException, EndOfFileException) { - for (; ; ++position_) { - if (position_ == position_end_) Shift(); - if (!isspace(*position_)) return; - } -} - const char *FilePiece::FindDelimiterOrEOF() throw (GZException, EndOfFileException) { for (const char *i = position_; i <= last_space_; ++i) { if (isspace(*i)) return i; @@ -150,25 +204,6 @@ const char *FilePiece::FindDelimiterOrEOF() throw (GZException, EndOfFileExcepti return position_end_; } -StringPiece FilePiece::ReadLine(char delim) throw (GZException, EndOfFileException) { - const char *start = position_; - do { - for (const char *i = start; i < position_end_; ++i) { - if (*i == delim) { - StringPiece ret(position_, i - position_); - position_ = i + 1; - return ret; - } - } - size_t skip = position_end_ - position_; - Shift(); - start = position_ + skip; - } while (!at_end_); - StringPiece ret(position_, position_end_ - position_); - position_ = position_end_; - return ret; -} - void FilePiece::Shift() throw(GZException, EndOfFileException) { if (at_end_) { progress_.Finished(); |