diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2011-01-25 22:30:48 +0200 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2011-01-25 22:30:48 +0200 |
commit | c4ade3091b812ca135ae6520fa7173e1bbf28754 (patch) | |
tree | 2528af208f6dafd0c27dcbec0d2da291a9c93ca2 /klm/util/file_piece.hh | |
parent | d04c0ca2d9df0e147239b18e90650ca8bd51d594 (diff) |
update kenlm
Diffstat (limited to 'klm/util/file_piece.hh')
-rw-r--r-- | klm/util/file_piece.hh | 35 |
1 files changed, 28 insertions, 7 deletions
diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh index b7697e71..f5249fcf 100644 --- a/klm/util/file_piece.hh +++ b/klm/util/file_piece.hh @@ -36,10 +36,13 @@ class GZException : public Exception { int OpenReadOrThrow(const char *name); +extern const bool kSpaces[256]; + // Return value for SizeFile when it can't size properly. const off_t kBadSize = -1; off_t SizeFile(int fd); +// Memory backing the returned StringPiece may vanish on the next call. class FilePiece { public: // 32 MB default. @@ -57,12 +60,12 @@ class FilePiece { return *(position_++); } - // Memory backing the returned StringPiece may vanish on the next call. - // Leaves the delimiter, if any, to be returned by get(). - StringPiece ReadDelimited() throw(GZException, EndOfFileException) { - SkipSpaces(); - return Consume(FindDelimiterOrEOF()); + // Leaves the delimiter, if any, to be returned by get(). Delimiters defined by isspace(). + StringPiece ReadDelimited(const bool *delim = kSpaces) throw(GZException, EndOfFileException) { + SkipSpaces(delim); + return Consume(FindDelimiterOrEOF(delim)); } + // Unlike ReadDelimited, this includes leading spaces and consumes the delimiter. // It is similar to getline in that way. StringPiece ReadLine(char delim = '\n') throw(GZException, EndOfFileException); @@ -72,7 +75,13 @@ class FilePiece { long int ReadLong() throw(GZException, EndOfFileException, ParseNumberException); unsigned long int ReadULong() throw(GZException, EndOfFileException, ParseNumberException); - void SkipSpaces() throw (GZException, EndOfFileException); + // Skip spaces defined by isspace. + void SkipSpaces(const bool *delim = kSpaces) throw (GZException, EndOfFileException) { + for (; ; ++position_) { + if (position_ == position_end_) Shift(); + if (!delim[static_cast<unsigned char>(*position_)]) return; + } + } off_t Offset() const { return position_ - data_.begin() + mapped_offset_; @@ -91,7 +100,19 @@ class FilePiece { return ret; } - const char *FindDelimiterOrEOF() throw(EndOfFileException, GZException); + const char *FindDelimiterOrEOF(const bool *delim = kSpaces) throw (GZException, EndOfFileException) { + for (const char *i = position_; i < position_end_; ++i) { + if (delim[static_cast<unsigned char>(*i)]) return i; + } + while (!at_end_) { + size_t skip = position_end_ - position_; + Shift(); + for (const char *i = position_ + skip; i < position_end_; ++i) { + if (delim[static_cast<unsigned char>(*i)]) return i; + } + } + return position_end_; + } void Shift() throw (EndOfFileException, GZException); // Backends to Shift(). |