summaryrefslogtreecommitdiff
path: root/klm/util/file_piece.hh
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-01-25 22:30:48 +0200
committerChris Dyer <cdyer@cs.cmu.edu>2011-01-25 22:30:48 +0200
commitc4ade3091b812ca135ae6520fa7173e1bbf28754 (patch)
tree2528af208f6dafd0c27dcbec0d2da291a9c93ca2 /klm/util/file_piece.hh
parentd04c0ca2d9df0e147239b18e90650ca8bd51d594 (diff)
update kenlm
Diffstat (limited to 'klm/util/file_piece.hh')
-rw-r--r--klm/util/file_piece.hh35
1 files changed, 28 insertions, 7 deletions
diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh
index b7697e71..f5249fcf 100644
--- a/klm/util/file_piece.hh
+++ b/klm/util/file_piece.hh
@@ -36,10 +36,13 @@ class GZException : public Exception {
int OpenReadOrThrow(const char *name);
+extern const bool kSpaces[256];
+
// Return value for SizeFile when it can't size properly.
const off_t kBadSize = -1;
off_t SizeFile(int fd);
+// Memory backing the returned StringPiece may vanish on the next call.
class FilePiece {
public:
// 32 MB default.
@@ -57,12 +60,12 @@ class FilePiece {
return *(position_++);
}
- // Memory backing the returned StringPiece may vanish on the next call.
- // Leaves the delimiter, if any, to be returned by get().
- StringPiece ReadDelimited() throw(GZException, EndOfFileException) {
- SkipSpaces();
- return Consume(FindDelimiterOrEOF());
+ // Leaves the delimiter, if any, to be returned by get(). Delimiters defined by isspace().
+ StringPiece ReadDelimited(const bool *delim = kSpaces) throw(GZException, EndOfFileException) {
+ SkipSpaces(delim);
+ return Consume(FindDelimiterOrEOF(delim));
}
+
// Unlike ReadDelimited, this includes leading spaces and consumes the delimiter.
// It is similar to getline in that way.
StringPiece ReadLine(char delim = '\n') throw(GZException, EndOfFileException);
@@ -72,7 +75,13 @@ class FilePiece {
long int ReadLong() throw(GZException, EndOfFileException, ParseNumberException);
unsigned long int ReadULong() throw(GZException, EndOfFileException, ParseNumberException);
- void SkipSpaces() throw (GZException, EndOfFileException);
+ // Skip spaces defined by isspace.
+ void SkipSpaces(const bool *delim = kSpaces) throw (GZException, EndOfFileException) {
+ for (; ; ++position_) {
+ if (position_ == position_end_) Shift();
+ if (!delim[static_cast<unsigned char>(*position_)]) return;
+ }
+ }
off_t Offset() const {
return position_ - data_.begin() + mapped_offset_;
@@ -91,7 +100,19 @@ class FilePiece {
return ret;
}
- const char *FindDelimiterOrEOF() throw(EndOfFileException, GZException);
+ const char *FindDelimiterOrEOF(const bool *delim = kSpaces) throw (GZException, EndOfFileException) {
+ for (const char *i = position_; i < position_end_; ++i) {
+ if (delim[static_cast<unsigned char>(*i)]) return i;
+ }
+ while (!at_end_) {
+ size_t skip = position_end_ - position_;
+ Shift();
+ for (const char *i = position_ + skip; i < position_end_; ++i) {
+ if (delim[static_cast<unsigned char>(*i)]) return i;
+ }
+ }
+ return position_end_;
+ }
void Shift() throw (EndOfFileException, GZException);
// Backends to Shift().