From 75310799a6ee82b742ba69abab951a74fd0d19fc Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Wed, 9 Mar 2011 13:40:23 -0500 Subject: kenlm sync --- klm/util/exception.cc | 16 +++++++++++++- klm/util/exception.hh | 36 +++++++++++------------------- klm/util/file_piece.cc | 33 ++++++++++++++++++++-------- klm/util/file_piece.hh | 17 ++------------- klm/util/have.hh | 9 ++++++++ klm/util/string_piece.cc | 57 ------------------------------------------------ klm/util/string_piece.hh | 5 +---- 7 files changed, 64 insertions(+), 109 deletions(-) create mode 100644 klm/util/have.hh delete mode 100644 klm/util/string_piece.cc (limited to 'klm/util') diff --git a/klm/util/exception.cc b/klm/util/exception.cc index de6dd43c..077405f4 100644 --- a/klm/util/exception.cc +++ b/klm/util/exception.cc @@ -8,6 +8,20 @@ namespace util { Exception::Exception() throw() {} Exception::~Exception() throw() {} +Exception::Exception(const Exception &from) : std::exception() { + stream_.str(from.stream_.str()); +} + +Exception &Exception::operator=(const Exception &from) { + stream_.str(from.stream_.str()); + return *this; +} + +const char *Exception::what() const throw() { + text_ = stream_.str(); + return text_.c_str(); +} + namespace { // The XOPEN version. const char *HandleStrerror(int ret, const char *buf) { @@ -16,7 +30,7 @@ const char *HandleStrerror(int ret, const char *buf) { } // The GNU version. -const char *HandleStrerror(const char *ret, const char *buf) { +const char *HandleStrerror(const char *ret, const char * /*buf*/) { return ret; } } // namespace diff --git a/klm/util/exception.hh b/klm/util/exception.hh index 124689cf..c6936914 100644 --- a/klm/util/exception.hh +++ b/klm/util/exception.hh @@ -9,24 +9,29 @@ namespace util { +template typename Except::template ExceptionTag::Identity operator<<(Except &e, const Data &data); + class Exception : public std::exception { public: Exception() throw(); virtual ~Exception() throw(); - const char *what() const throw() { return what_.c_str(); } + Exception(const Exception &from); + Exception &operator=(const Exception &from); + + // Not threadsafe, but probably doesn't matter. FWIW, Boost's exception guidance implies that what() isn't threadsafe. + const char *what() const throw(); + + private: + template friend typename Except::template ExceptionTag::Identity operator<<(Except &e, const Data &data); // This helps restrict operator<< defined below. template struct ExceptionTag { typedef T Identity; }; - std::string &Str() { - return what_; - } - - protected: - std::string what_; + std::stringstream stream_; + mutable std::string text_; }; /* This implements the normal operator<< for Exception and all its children. @@ -34,22 +39,7 @@ class Exception : public std::exception { * boost::enable_if. */ template typename Except::template ExceptionTag::Identity operator<<(Except &e, const Data &data) { - // Argh I had a stringstream in the exception, but the only way to get the string is by calling str(). But that's a temporary string, so virtual const char *what() const can't actually return it. - std::stringstream stream; - stream << data; - e.Str() += stream.str(); - return e; -} -template typename Except::template ExceptionTag::Identity operator<<(Except &e, const char *data) { - e.Str() += data; - return e; -} -template typename Except::template ExceptionTag::Identity operator<<(Except &e, const std::string &data) { - e.Str() += data; - return e; -} -template typename Except::template ExceptionTag::Identity operator<<(Except &e, const StringPiece &str) { - e.Str().append(str.data(), str.length()); + e.stream_ << data; return e; } diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index 81eb9bb9..67681f7e 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -79,22 +79,22 @@ FilePiece::~FilePiece() { } StringPiece FilePiece::ReadLine(char delim) throw (GZException, EndOfFileException) { - const char *start = position_; - do { - for (const char *i = start; i < position_end_; ++i) { + size_t skip = 0; + while (true) { + for (const char *i = position_ + skip; i < position_end_; ++i) { if (*i == delim) { StringPiece ret(position_, i - position_); position_ = i + 1; return ret; } } - size_t skip = position_end_ - position_; + if (at_end_) { + if (position_ == position_end_) Shift(); + return Consume(position_end_); + } + skip = position_end_ - position_; Shift(); - start = position_ + skip; - } while (!at_end_); - StringPiece ret(position_, position_end_ - position_); - position_ = position_end_; - return ret; + } } float FilePiece::ReadFloat() throw(GZException, EndOfFileException, ParseNumberException) { @@ -186,6 +186,21 @@ template T FilePiece::ReadNumber() throw(GZException, EndOfFileExcepti return ret; } +const char *FilePiece::FindDelimiterOrEOF(const bool *delim) throw (GZException, EndOfFileException) { + size_t skip = 0; + while (true) { + for (const char *i = position_ + skip; i < position_end_; ++i) { + if (delim[static_cast(*i)]) return i; + } + if (at_end_) { + if (position_ == position_end_) Shift(); + return position_end_; + } + skip = position_end_ - position_; + Shift(); + } +} + void FilePiece::Shift() throw(GZException, EndOfFileException) { if (at_end_) { progress_.Finished(); diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh index f5249fcf..870ae5a3 100644 --- a/klm/util/file_piece.hh +++ b/klm/util/file_piece.hh @@ -3,6 +3,7 @@ #include "util/ersatz_progress.hh" #include "util/exception.hh" +#include "util/have.hh" #include "util/mmap.hh" #include "util/scoped.hh" #include "util/string_piece.hh" @@ -11,8 +12,6 @@ #include -#define HAVE_ZLIB - namespace util { class EndOfFileException : public Exception { @@ -100,19 +99,7 @@ class FilePiece { return ret; } - const char *FindDelimiterOrEOF(const bool *delim = kSpaces) throw (GZException, EndOfFileException) { - for (const char *i = position_; i < position_end_; ++i) { - if (delim[static_cast(*i)]) return i; - } - while (!at_end_) { - size_t skip = position_end_ - position_; - Shift(); - for (const char *i = position_ + skip; i < position_end_; ++i) { - if (delim[static_cast(*i)]) return i; - } - } - return position_end_; - } + const char *FindDelimiterOrEOF(const bool *delim = kSpaces) throw (GZException, EndOfFileException); void Shift() throw (EndOfFileException, GZException); // Backends to Shift(). diff --git a/klm/util/have.hh b/klm/util/have.hh new file mode 100644 index 00000000..7cf62008 --- /dev/null +++ b/klm/util/have.hh @@ -0,0 +1,9 @@ +/* Optional packages. You might want to integrate this with your build system e.g. config.h from ./configure. */ +#ifndef UTIL_HAVE__ +#define UTIL_HAVE__ + +#define HAVE_ZLIB +// #define HAVE_ICU +#define HAVE_BOOST + +#endif // UTIL_HAVE__ diff --git a/klm/util/string_piece.cc b/klm/util/string_piece.cc deleted file mode 100644 index 5b4e98f5..00000000 --- a/klm/util/string_piece.cc +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// Copied from strings/stringpiece.cc with modifications - -#include "util/string_piece.hh" - -#ifdef HAVE_BOOST -#include -#endif - -#include -#include - -#ifdef HAVE_ICU -U_NAMESPACE_BEGIN -#endif - -std::ostream& operator<<(std::ostream& o, const StringPiece& piece) { - o.write(piece.data(), static_cast(piece.size())); - return o; -} - -#ifdef HAVE_BOOST -size_t hash_value(const StringPiece &str) { - return boost::hash_range(str.data(), str.data() + str.length()); -} -#endif - -#ifdef HAVE_ICU -U_NAMESPACE_END -#endif diff --git a/klm/util/string_piece.hh b/klm/util/string_piece.hh index 2583db5e..e48ce3d9 100644 --- a/klm/util/string_piece.hh +++ b/klm/util/string_piece.hh @@ -48,10 +48,7 @@ #ifndef BASE_STRING_PIECE_H__ #define BASE_STRING_PIECE_H__ -//Uncomment this line if you use ICU in your code. -//#define HAVE_ICU -//Uncomment this line if you want boost hashing for your StringPieces. -//#define HAVE_BOOST +#include "util/have.hh" #ifdef HAVE_BOOST #include -- cgit v1.2.3