diff options
| author | Patrick Simianer <p@simianer.de> | 2014-06-15 04:26:08 +0200 | 
|---|---|---|
| committer | Patrick Simianer <p@simianer.de> | 2014-06-15 04:26:08 +0200 | 
| commit | 001804edef29549dc49b2adf6bab88cd7f24760a (patch) | |
| tree | bb75ae54b21e265383f24cc3cecae4c394a9cdda /klm/util | |
| parent | 244971287003d079e46193b8a209c28955f90134 (diff) | |
| parent | b4ce7c0b51d8615abf84c022ec3a981bee3277fe (diff) | |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'klm/util')
| -rw-r--r-- | klm/util/string_piece.cc | 193 | ||||
| -rw-r--r-- | klm/util/string_piece.hh | 270 | 
2 files changed, 2 insertions, 461 deletions
| diff --git a/klm/util/string_piece.cc b/klm/util/string_piece.cc index ec394b96..973091c4 100644 --- a/klm/util/string_piece.cc +++ b/klm/util/string_piece.cc @@ -1,193 +1,2 @@ -// Copyright 2004 The RE2 Authors.  All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in string_piece.hh. +// this has been moved to utils/ in cdec -#include "util/string_piece.hh" - -#include <algorithm> - -#include <limits.h> - -#ifndef HAVE_ICU - -typedef StringPiece::size_type size_type; - -void StringPiece::CopyToString(std::string* target) const { -  target->assign(ptr_, length_); -} - -size_type StringPiece::find(const StringPiece& s, size_type pos) const { -  // Not sure why length_ < 0 was here since it's std::size_t. -  if (/*length_ < 0 || */pos > static_cast<size_type>(length_)) -    return npos; - -  const char* result = std::search(ptr_ + pos, ptr_ + length_, -                                   s.ptr_, s.ptr_ + s.length_); -  const size_type xpos = result - ptr_; -  return xpos + s.length_ <= length_ ? xpos : npos; -} - -size_type StringPiece::find(char c, size_type pos) const { -  if (length_ <= 0 || pos >= static_cast<size_type>(length_)) { -    return npos; -  } -  const char* result = std::find(ptr_ + pos, ptr_ + length_, c); -  return result != ptr_ + length_ ? result - ptr_ : npos; -} - -size_type StringPiece::rfind(const StringPiece& s, size_type pos) const { -  if (length_ < s.length_) return npos; -  const size_t ulen = length_; -  if (s.length_ == 0) return std::min(ulen, pos); - -  const char* last = ptr_ + std::min(ulen - s.length_, pos) + s.length_; -  const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_); -  return result != last ? result - ptr_ : npos; -} - -size_type StringPiece::rfind(char c, size_type pos) const { -  if (length_ <= 0) return npos; -  for (int i = std::min(pos, static_cast<size_type>(length_ - 1)); -       i >= 0; --i) { -    if (ptr_[i] == c) { -      return i; -    } -  } -  return npos; -} - -// For each character in characters_wanted, sets the index corresponding -// to the ASCII code of that character to 1 in table.  This is used by -// the find_.*_of methods below to tell whether or not a character is in -// the lookup table in constant time. -// The argument `table' must be an array that is large enough to hold all -// the possible values of an unsigned char.  Thus it should be be declared -// as follows: -//   bool table[UCHAR_MAX + 1] -static inline void BuildLookupTable(const StringPiece& characters_wanted, -                                    bool* table) { -  const size_type length = characters_wanted.length(); -  const char* const data = characters_wanted.data(); -  for (size_type i = 0; i < length; ++i) { -    table[static_cast<unsigned char>(data[i])] = true; -  } -} - -size_type StringPiece::find_first_of(const StringPiece& s, -                                     size_type pos) const { -  if (length_ == 0 || s.length_ == 0) -    return npos; - -  // Avoid the cost of BuildLookupTable() for a single-character search. -  if (s.length_ == 1) -    return find_first_of(s.ptr_[0], pos); - -  bool lookup[UCHAR_MAX + 1] = { false }; -  BuildLookupTable(s, lookup); -  for (size_type i = pos; i < length_; ++i) { -    if (lookup[static_cast<unsigned char>(ptr_[i])]) { -      return i; -    } -  } -  return npos; -} - -size_type StringPiece::find_first_not_of(const StringPiece& s, -                                         size_type pos) const { -  if (length_ == 0) -    return npos; - -  if (s.length_ == 0) -    return 0; - -  // Avoid the cost of BuildLookupTable() for a single-character search. -  if (s.length_ == 1) -    return find_first_not_of(s.ptr_[0], pos); - -  bool lookup[UCHAR_MAX + 1] = { false }; -  BuildLookupTable(s, lookup); -  for (size_type i = pos; i < length_; ++i) { -    if (!lookup[static_cast<unsigned char>(ptr_[i])]) { -      return i; -    } -  } -  return npos; -} - -size_type StringPiece::find_first_not_of(char c, size_type pos) const { -  if (length_ == 0) -    return npos; - -  for (; pos < length_; ++pos) { -    if (ptr_[pos] != c) { -      return pos; -    } -  } -  return npos; -} - -size_type StringPiece::find_last_of(const StringPiece& s, size_type pos) const { -  if (length_ == 0 || s.length_ == 0) -    return npos; - -  // Avoid the cost of BuildLookupTable() for a single-character search. -  if (s.length_ == 1) -    return find_last_of(s.ptr_[0], pos); - -  bool lookup[UCHAR_MAX + 1] = { false }; -  BuildLookupTable(s, lookup); -  for (size_type i = std::min(pos, length_ - 1); ; --i) { -    if (lookup[static_cast<unsigned char>(ptr_[i])]) -      return i; -    if (i == 0) -      break; -  } -  return npos; -} - -size_type StringPiece::find_last_not_of(const StringPiece& s, -                                        size_type pos) const { -  if (length_ == 0) -    return npos; - -  size_type i = std::min(pos, length_ - 1); -  if (s.length_ == 0) -    return i; - -  // Avoid the cost of BuildLookupTable() for a single-character search. -  if (s.length_ == 1) -    return find_last_not_of(s.ptr_[0], pos); - -  bool lookup[UCHAR_MAX + 1] = { false }; -  BuildLookupTable(s, lookup); -  for (; ; --i) { -    if (!lookup[static_cast<unsigned char>(ptr_[i])]) -      return i; -    if (i == 0) -      break; -  } -  return npos; -} - -size_type StringPiece::find_last_not_of(char c, size_type pos) const { -  if (length_ == 0) -    return npos; - -  for (size_type i = std::min(pos, length_ - 1); ; --i) { -    if (ptr_[i] != c) -      return i; -    if (i == 0) -      break; -  } -  return npos; -} - -StringPiece StringPiece::substr(size_type pos, size_type n) const { -  if (pos > length_) pos = length_; -  if (n > length_ - pos) n = length_ - pos; -  return StringPiece(ptr_ + pos, n); -} - -const size_type StringPiece::npos = size_type(-1); - -#endif // !HAVE_ICU diff --git a/klm/util/string_piece.hh b/klm/util/string_piece.hh index 84431db1..696ca084 100644 --- a/klm/util/string_piece.hh +++ b/klm/util/string_piece.hh @@ -1,270 +1,2 @@ -/* If you use ICU in your program, then compile with -DHAVE_ICU -licui18n.  If - * you don't use ICU, then this will use the Google implementation from Chrome. - * This has been modified from the original version to let you choose. - */ +#include "utils/string_piece.hh" -// Copyright 2008, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -//    * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -//    * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -//    * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// Copied from strings/stringpiece.h with modifications -// -// A string-like object that points to a sized piece of memory. -// -// Functions or methods may use const StringPiece& parameters to accept either -// a "const char*" or a "string" value that will be implicitly converted to -// a StringPiece.  The implicit conversion means that it is often appropriate -// to include this .h file in other files rather than forward-declaring -// StringPiece as would be appropriate for most other Google classes. -// -// Systematic usage of StringPiece is encouraged as it will reduce unnecessary -// conversions from "const char*" to "string" and back again. -// - -#ifndef BASE_STRING_PIECE_H__ -#define BASE_STRING_PIECE_H__ - -#include "util/have.hh" - -#include <cstring> -#include <iosfwd> -#include <ostream> - -#ifdef HAVE_ICU -#include <unicode/stringpiece.h> -#include <unicode/uversion.h> - -// Old versions of ICU don't define operator== and operator!=. -#if (U_ICU_VERSION_MAJOR_NUM < 4) || ((U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM < 4)) -#warning You are using an old version of ICU.  Consider upgrading to ICU >= 4.6. -inline bool operator==(const StringPiece& x, const StringPiece& y) { -  if (x.size() != y.size()) -    return false; - -  return std::memcmp(x.data(), y.data(), x.size()) == 0; -} - -inline bool operator!=(const StringPiece& x, const StringPiece& y) { -  return !(x == y); -} -#endif // old version of ICU - -U_NAMESPACE_BEGIN - -inline bool starts_with(const StringPiece& longer, const StringPiece& prefix) { -  int longersize = longer.size(), prefixsize = prefix.size(); -  return longersize >= prefixsize && std::memcmp(longer.data(), prefix.data(), prefixsize) == 0; -} - -#else - -#include <algorithm> -#include <cstddef> -#include <string> -#include <string.h> - -#ifdef WIN32 -#undef max -#undef min -#endif - -class StringPiece { - public: -  typedef size_t size_type; - - private: -  const char*   ptr_; -  size_type     length_; - - public: -  // We provide non-explicit singleton constructors so users can pass -  // in a "const char*" or a "string" wherever a "StringPiece" is -  // expected. -  StringPiece() : ptr_(NULL), length_(0) { } -  StringPiece(const char* str) -    : ptr_(str), length_((str == NULL) ? 0 : strlen(str)) { } -  StringPiece(const std::string& str) -    : ptr_(str.data()), length_(str.size()) { } -  StringPiece(const char* offset, size_type len) -    : ptr_(offset), length_(len) { } - -  // data() may return a pointer to a buffer with embedded NULs, and the -  // returned buffer may or may not be null terminated.  Therefore it is -  // typically a mistake to pass data() to a routine that expects a NUL -  // terminated string. -  const char* data() const { return ptr_; } -  size_type size() const { return length_; } -  size_type length() const { return length_; } -  bool empty() const { return length_ == 0; } - -  void clear() { ptr_ = NULL; length_ = 0; } -  void set(const char* data, size_type len) { ptr_ = data; length_ = len; } -  void set(const char* str) { -    ptr_ = str; -    length_ = str ? strlen(str) : 0; -  } -  void set(const void* data, size_type len) { -    ptr_ = reinterpret_cast<const char*>(data); -    length_ = len; -  } - -  char operator[](size_type i) const { return ptr_[i]; } - -  void remove_prefix(size_type n) { -    ptr_ += n; -    length_ -= n; -  } - -  void remove_suffix(size_type n) { -    length_ -= n; -  } - -  int compare(const StringPiece& x) const { -    int r = wordmemcmp(ptr_, x.ptr_, std::min(length_, x.length_)); -    if (r == 0) { -      if (length_ < x.length_) r = -1; -      else if (length_ > x.length_) r = +1; -    } -    return r; -  } - -  std::string as_string() const { -    // std::string doesn't like to take a NULL pointer even with a 0 size. -    return std::string(!empty() ? data() : "", size()); -  } - -  void CopyToString(std::string* target) const; -  void AppendToString(std::string* target) const; - -  // Does "this" start with "x" -  bool starts_with(const StringPiece& x) const { -    return ((length_ >= x.length_) && -            (wordmemcmp(ptr_, x.ptr_, x.length_) == 0)); -  } - -  // Does "this" end with "x" -  bool ends_with(const StringPiece& x) const { -    return ((length_ >= x.length_) && -            (wordmemcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0)); -  } - -  // standard STL container boilerplate -  typedef char value_type; -  typedef const char* pointer; -  typedef const char& reference; -  typedef const char& const_reference; -  typedef ptrdiff_t difference_type; -  static const size_type npos; -  typedef const char* const_iterator; -  typedef const char* iterator; -  typedef std::reverse_iterator<const_iterator> const_reverse_iterator; -  typedef std::reverse_iterator<iterator> reverse_iterator; -  iterator begin() const { return ptr_; } -  iterator end() const { return ptr_ + length_; } -  const_reverse_iterator rbegin() const { -    return const_reverse_iterator(ptr_ + length_); -  } -  const_reverse_iterator rend() const { -    return const_reverse_iterator(ptr_); -  } - -  size_type max_size() const { return length_; } -  size_type capacity() const { return length_; } - -  size_type copy(char* buf, size_type n, size_type pos = 0) const; - -  size_type find(const StringPiece& s, size_type pos = 0) const; -  size_type find(char c, size_type pos = 0) const; -  size_type rfind(const StringPiece& s, size_type pos = npos) const; -  size_type rfind(char c, size_type pos = npos) const; - -  size_type find_first_of(const StringPiece& s, size_type pos = 0) const; -  size_type find_first_of(char c, size_type pos = 0) const { -    return find(c, pos); -  } -  size_type find_first_not_of(const StringPiece& s, size_type pos = 0) const; -  size_type find_first_not_of(char c, size_type pos = 0) const; -  size_type find_last_of(const StringPiece& s, size_type pos = npos) const; -  size_type find_last_of(char c, size_type pos = npos) const { -    return rfind(c, pos); -  } -  size_type find_last_not_of(const StringPiece& s, size_type pos = npos) const; -  size_type find_last_not_of(char c, size_type pos = npos) const; - -  StringPiece substr(size_type pos, size_type n = npos) const; - -  static int wordmemcmp(const char* p, const char* p2, size_type N) { -    return std::memcmp(p, p2, N); -  } -}; - -inline bool operator==(const StringPiece& x, const StringPiece& y) { -  if (x.size() != y.size()) -    return false; - -  return std::memcmp(x.data(), y.data(), x.size()) == 0; -} - -inline bool operator!=(const StringPiece& x, const StringPiece& y) { -  return !(x == y); -} - -inline bool starts_with(const StringPiece& longer, const StringPiece& prefix) { -  return longer.starts_with(prefix); -} - -#endif // HAVE_ICU undefined - -inline bool operator<(const StringPiece& x, const StringPiece& y) { -  const int r = std::memcmp(x.data(), y.data(), -                                       std::min(x.size(), y.size())); -  return ((r < 0) || ((r == 0) && (x.size() < y.size()))); -} - -inline bool operator>(const StringPiece& x, const StringPiece& y) { -  return y < x; -} - -inline bool operator<=(const StringPiece& x, const StringPiece& y) { -  return !(x > y); -} - -inline bool operator>=(const StringPiece& x, const StringPiece& y) { -  return !(x < y); -} - -// allow StringPiece to be logged (needed for unit testing). -inline std::ostream& operator<<(std::ostream& o, const StringPiece& piece) { -  return o.write(piece.data(), static_cast<std::streamsize>(piece.size())); -} - -#ifdef HAVE_ICU -U_NAMESPACE_END -using U_NAMESPACE_QUALIFIER StringPiece; -#endif - -#endif  // BASE_STRING_PIECE_H__ | 
