diff options
Diffstat (limited to 'klm/util')
| -rw-r--r-- | klm/util/Makefile.am | 4 | ||||
| -rw-r--r-- | klm/util/bit_packing.hh | 14 | ||||
| -rw-r--r-- | klm/util/exception.cc | 5 | ||||
| -rw-r--r-- | klm/util/exception.hh | 6 | ||||
| -rw-r--r-- | klm/util/file.cc | 74 | ||||
| -rw-r--r-- | klm/util/file.hh | 74 | ||||
| -rw-r--r-- | klm/util/file_piece.cc | 18 | ||||
| -rw-r--r-- | klm/util/file_piece.hh | 14 | ||||
| -rw-r--r-- | klm/util/mmap.cc | 18 | ||||
| -rw-r--r-- | klm/util/mmap.hh | 4 | ||||
| -rw-r--r-- | klm/util/scoped.cc | 24 | ||||
| -rw-r--r-- | klm/util/scoped.hh | 58 | ||||
| -rw-r--r-- | klm/util/sized_iterator.hh | 107 | ||||
| -rw-r--r-- | klm/util/tokenize_piece.hh | 69 | 
14 files changed, 377 insertions, 112 deletions
diff --git a/klm/util/Makefile.am b/klm/util/Makefile.am index f4f7d158..a8d6299b 100644 --- a/klm/util/Makefile.am +++ b/klm/util/Makefile.am @@ -22,9 +22,9 @@ libklm_util_a_SOURCES = \    ersatz_progress.cc \    bit_packing.cc \    exception.cc \ +	file.cc \    file_piece.cc \    mmap.cc \ -  murmur_hash.cc \ -  scoped.cc +  murmur_hash.cc  AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. diff --git a/klm/util/bit_packing.hh b/klm/util/bit_packing.hh index 9f47d559..33266b94 100644 --- a/klm/util/bit_packing.hh +++ b/klm/util/bit_packing.hh @@ -86,6 +86,20 @@ inline void WriteFloat32(void *base, uint64_t bit_off, float value) {  const uint32_t kSignBit = 0x80000000; +inline void SetSign(float &to) { +  FloatEnc enc; +  enc.f = to; +  enc.i |= kSignBit; +  to = enc.f; +} + +inline void UnsetSign(float &to) { +  FloatEnc enc; +  enc.f = to; +  enc.i &= ~kSignBit; +  to = enc.f; +} +  inline float ReadNonPositiveFloat31(const void *base, uint64_t bit_off) {    FloatEnc encoded;    encoded.i = ReadOff(base, bit_off) >> BitPackShift(bit_off & 7, 31); diff --git a/klm/util/exception.cc b/klm/util/exception.cc index 62280970..96951495 100644 --- a/klm/util/exception.cc +++ b/klm/util/exception.cc @@ -79,4 +79,9 @@ ErrnoException::ErrnoException() throw() : errno_(errno) {  ErrnoException::~ErrnoException() throw() {} +EndOfFileException::EndOfFileException() throw() { +  *this << "End of file"; +} +EndOfFileException::~EndOfFileException() throw() {} +  } // namespace util diff --git a/klm/util/exception.hh b/klm/util/exception.hh index 81675a57..6d6a37cb 100644 --- a/klm/util/exception.hh +++ b/klm/util/exception.hh @@ -105,6 +105,12 @@ class ErrnoException : public Exception {      int errno_;  }; +class EndOfFileException : public Exception { +  public: +    EndOfFileException() throw(); +    ~EndOfFileException() throw(); +}; +  } // namespace util  #endif // UTIL_EXCEPTION__ diff --git a/klm/util/file.cc b/klm/util/file.cc new file mode 100644 index 00000000..d707568e --- /dev/null +++ b/klm/util/file.cc @@ -0,0 +1,74 @@ +#include "util/file.hh" + +#include "util/exception.hh" + +#include <cstdlib> +#include <cstdio> +#include <iostream> + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <inttypes.h> + +namespace util { + +scoped_fd::~scoped_fd() { +  if (fd_ != -1 && close(fd_)) { +    std::cerr << "Could not close file " << fd_ << std::endl; +    std::abort(); +  } +} + +scoped_FILE::~scoped_FILE() { +  if (file_ && std::fclose(file_)) { +    std::cerr << "Could not close file " << std::endl; +    std::abort(); +  } +} + +int OpenReadOrThrow(const char *name) { +  int ret; +  UTIL_THROW_IF(-1 == (ret = open(name, O_RDONLY)), ErrnoException, "while opening " << name); +  return ret; +} + +int CreateOrThrow(const char *name) { +  int ret; +  UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR)), ErrnoException, "while creating " << name); +  return ret; +} + +off_t SizeFile(int fd) { +  struct stat sb; +  if (fstat(fd, &sb) == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize; +  return sb.st_size; +} + +void ReadOrThrow(int fd, void *to_void, std::size_t amount) { +  uint8_t *to = static_cast<uint8_t*>(to_void); +  while (amount) { +    ssize_t ret = read(fd, to, amount); +    if (ret == -1) UTIL_THROW(ErrnoException, "Reading " << amount << " from fd " << fd << " failed."); +    if (ret == 0) UTIL_THROW(Exception, "Hit EOF in fd " << fd << " but there should be " << amount << " more bytes to read."); +    amount -= ret; +    to += ret; +  } +} + +void WriteOrThrow(int fd, const void *data_void, std::size_t size) { +  const uint8_t *data = static_cast<const uint8_t*>(data_void); +  while (size) { +    ssize_t ret = write(fd, data, size); +    if (ret < 1) UTIL_THROW(util::ErrnoException, "Write failed"); +    data += ret; +    size -= ret; +  } +} + +void RemoveOrThrow(const char *name) { +  UTIL_THROW_IF(std::remove(name), util::ErrnoException, "Could not remove " << name); +} + +} // namespace util diff --git a/klm/util/file.hh b/klm/util/file.hh new file mode 100644 index 00000000..d6cca41d --- /dev/null +++ b/klm/util/file.hh @@ -0,0 +1,74 @@ +#ifndef UTIL_FILE__ +#define UTIL_FILE__ + +#include <cstdio> +#include <unistd.h> + +namespace util { + +class scoped_fd { +  public: +    scoped_fd() : fd_(-1) {} + +    explicit scoped_fd(int fd) : fd_(fd) {} + +    ~scoped_fd(); + +    void reset(int to) { +      scoped_fd other(fd_); +      fd_ = to; +    } + +    int get() const { return fd_; } + +    int operator*() const { return fd_; } + +    int release() { +      int ret = fd_; +      fd_ = -1; +      return ret; +    } + +    operator bool() { return fd_ != -1; } + +  private: +    int fd_; + +    scoped_fd(const scoped_fd &); +    scoped_fd &operator=(const scoped_fd &); +}; + +class scoped_FILE { +  public: +    explicit scoped_FILE(std::FILE *file = NULL) : file_(file) {} + +    ~scoped_FILE(); + +    std::FILE *get() { return file_; } +    const std::FILE *get() const { return file_; } + +    void reset(std::FILE *to = NULL) { +      scoped_FILE other(file_); +      file_ = to; +    } + +  private: +    std::FILE *file_; +}; + +int OpenReadOrThrow(const char *name); + +int CreateOrThrow(const char *name); + +// Return value for SizeFile when it can't size properly.   +const off_t kBadSize = -1; +off_t SizeFile(int fd); + +void ReadOrThrow(int fd, void *to, std::size_t size); +void WriteOrThrow(int fd, const void *data_void, std::size_t size); + +void RemoveOrThrow(const char *name); + +} // namespace util + +#endif // UTIL_FILE__ diff --git a/klm/util/file_piece.cc b/klm/util/file_piece.cc index cbe4234f..b57582a0 100644 --- a/klm/util/file_piece.cc +++ b/klm/util/file_piece.cc @@ -1,6 +1,7 @@  #include "util/file_piece.hh"  #include "util/exception.hh" +#include "util/file.hh"  #include <iostream>  #include <string> @@ -21,11 +22,6 @@  namespace util { -EndOfFileException::EndOfFileException() throw() { -  *this << "End of file"; -} -EndOfFileException::~EndOfFileException() throw() {} -  ParseNumberException::ParseNumberException(StringPiece value) throw() {    *this << "Could not parse \"" << value << "\" into a number";  } @@ -40,18 +36,6 @@ GZException::GZException(void *file) {  // Sigh this is the only way I could come up with to do a _const_ bool.  It has ' ', '\f', '\n', '\r', '\t', and '\v' (same as isspace on C locale).   const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; -int OpenReadOrThrow(const char *name) { -  int ret; -  UTIL_THROW_IF(-1 == (ret = open(name, O_RDONLY)), ErrnoException, "while opening " << name); -  return ret; -} - -off_t SizeFile(int fd) { -  struct stat sb; -  if (fstat(fd, &sb) == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize; -  return sb.st_size; -} -  FilePiece::FilePiece(const char *name, std::ostream *show_progress, off_t min_buffer) :     file_(OpenReadOrThrow(name)), total_size_(SizeFile(file_.get())), page_(sysconf(_SC_PAGE_SIZE)),    progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) { diff --git a/klm/util/file_piece.hh b/klm/util/file_piece.hh index a5c00910..a627f38c 100644 --- a/klm/util/file_piece.hh +++ b/klm/util/file_piece.hh @@ -3,9 +3,9 @@  #include "util/ersatz_progress.hh"  #include "util/exception.hh" +#include "util/file.hh"  #include "util/have.hh"  #include "util/mmap.hh" -#include "util/scoped.hh"  #include "util/string_piece.hh"  #include <string> @@ -14,12 +14,6 @@  namespace util { -class EndOfFileException : public Exception { -  public: -    EndOfFileException() throw(); -    ~EndOfFileException() throw(); -}; -  class ParseNumberException : public Exception {    public:      explicit ParseNumberException(StringPiece value) throw(); @@ -33,14 +27,8 @@ class GZException : public Exception {      ~GZException() throw() {}  }; -int OpenReadOrThrow(const char *name); -  extern const bool kSpaces[256]; -// Return value for SizeFile when it can't size properly.   -const off_t kBadSize = -1; -off_t SizeFile(int fd); -  // Memory backing the returned StringPiece may vanish on the next call.    class FilePiece {    public: diff --git a/klm/util/mmap.cc b/klm/util/mmap.cc index e7c0643b..5ce7adc9 100644 --- a/klm/util/mmap.cc +++ b/klm/util/mmap.cc @@ -1,6 +1,6 @@  #include "util/exception.hh" +#include "util/file.hh"  #include "util/mmap.hh" -#include "util/scoped.hh"  #include <iostream> @@ -66,20 +66,6 @@ void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int    return ret;  } -namespace { -void ReadAll(int fd, void *to_void, std::size_t amount) { -  uint8_t *to = static_cast<uint8_t*>(to_void); -  while (amount) { -    ssize_t ret = read(fd, to, amount); -    if (ret == -1) UTIL_THROW(ErrnoException, "Reading " << amount << " from fd " << fd << " failed."); -    if (ret == 0) UTIL_THROW(Exception, "Hit EOF in fd " << fd << " but there should be " << amount << " more bytes to read."); -    amount -= ret; -    to += ret; -  } -} - -} // namespace -  const int kFileFlags =  #ifdef MAP_FILE    MAP_FILE | MAP_SHARED @@ -106,7 +92,7 @@ void MapRead(LoadMethod method, int fd, off_t offset, std::size_t size, scoped_m        out.reset(malloc(size), size, scoped_memory::MALLOC_ALLOCATED);        if (!out.get()) UTIL_THROW(util::ErrnoException, "Allocating " << size << " bytes with malloc");        if (-1 == lseek(fd, offset, SEEK_SET)) UTIL_THROW(ErrnoException, "lseek to " << offset << " in fd " << fd << " failed."); -      ReadAll(fd, out.get(), size); +      ReadOrThrow(fd, out.get(), size);        break;    }  } diff --git a/klm/util/mmap.hh b/klm/util/mmap.hh index e4439fa4..b0eb6672 100644 --- a/klm/util/mmap.hh +++ b/klm/util/mmap.hh @@ -2,8 +2,6 @@  #define UTIL_MMAP__  // Utilities for mmaped files.   -#include "util/scoped.hh" -  #include <cstddef>  #include <inttypes.h> @@ -11,6 +9,8 @@  namespace util { +class scoped_fd; +  // (void*)-1 is MAP_FAILED; this is done to avoid including the mmap header here.    class scoped_mmap {    public: diff --git a/klm/util/scoped.cc b/klm/util/scoped.cc deleted file mode 100644 index a4cc5016..00000000 --- a/klm/util/scoped.cc +++ /dev/null @@ -1,24 +0,0 @@ -#include "util/scoped.hh" - -#include <iostream> - -#include <stdlib.h> -#include <unistd.h> - -namespace util { - -scoped_fd::~scoped_fd() { -  if (fd_ != -1 && close(fd_)) { -    std::cerr << "Could not close file " << fd_ << std::endl; -    abort(); -  } -} - -scoped_FILE::~scoped_FILE() { -  if (file_ && fclose(file_)) { -    std::cerr << "Could not close file " << std::endl; -    abort(); -  } -} - -} // namespace util diff --git a/klm/util/scoped.hh b/klm/util/scoped.hh index d36a7df3..93e2e817 100644 --- a/klm/util/scoped.hh +++ b/klm/util/scoped.hh @@ -1,10 +1,11 @@  #ifndef UTIL_SCOPED__  #define UTIL_SCOPED__ -/* Other scoped objects in the style of scoped_ptr. */ +#include "util/exception.hh" +/* Other scoped objects in the style of scoped_ptr. */  #include <cstddef> -#include <cstdio> +#include <cstdlib>  namespace util { @@ -34,52 +35,33 @@ template <class T, class R, R (*Free)(T*)> class scoped_thing {      scoped_thing &operator=(const scoped_thing &);  }; -class scoped_fd { +class scoped_malloc {    public: -    scoped_fd() : fd_(-1) {} +    scoped_malloc() : p_(NULL) {} -    explicit scoped_fd(int fd) : fd_(fd) {} +    scoped_malloc(void *p) : p_(p) {} -    ~scoped_fd(); +    ~scoped_malloc() { std::free(p_); } -    void reset(int to) { -      scoped_fd other(fd_); -      fd_ = to; +    void reset(void *p = NULL) { +      scoped_malloc other(p_); +      p_ = p;      } -    int get() const { return fd_; } - -    int operator*() const { return fd_; } - -    int release() { -      int ret = fd_; -      fd_ = -1; -      return ret; +    void call_realloc(std::size_t to) { +      void *ret; +      UTIL_THROW_IF(!(ret = std::realloc(p_, to)) && to, util::ErrnoException, "realloc to " << to << " bytes failed."); +      p_ = ret;      } -  private: -    int fd_; - -    scoped_fd(const scoped_fd &); -    scoped_fd &operator=(const scoped_fd &); -}; - -class scoped_FILE { -  public: -    explicit scoped_FILE(std::FILE *file = NULL) : file_(file) {} - -    ~scoped_FILE(); - -    std::FILE *get() { return file_; } -    const std::FILE *get() const { return file_; } - -    void reset(std::FILE *to = NULL) { -      scoped_FILE other(file_); -      file_ = to; -    } +    void *get() { return p_; } +    const void *get() const { return p_; }    private: -    std::FILE *file_; +    void *p_; + +    scoped_malloc(const scoped_malloc &); +    scoped_malloc &operator=(const scoped_malloc &);  };  // Hat tip to boost.   diff --git a/klm/util/sized_iterator.hh b/klm/util/sized_iterator.hh new file mode 100644 index 00000000..47dfc245 --- /dev/null +++ b/klm/util/sized_iterator.hh @@ -0,0 +1,107 @@ +#ifndef UTIL_SIZED_ITERATOR__ +#define UTIL_SIZED_ITERATOR__ + +#include "util/proxy_iterator.hh" + +#include <functional> +#include <string> + +#include <inttypes.h> +#include <string.h> + +namespace util { + +class SizedInnerIterator { +  public: +    SizedInnerIterator() {} + +    SizedInnerIterator(void *ptr, std::size_t size) : ptr_(static_cast<uint8_t*>(ptr)), size_(size) {} + +    bool operator==(const SizedInnerIterator &other) const { +      return ptr_ == other.ptr_; +    } +    bool operator<(const SizedInnerIterator &other) const { +      return ptr_ < other.ptr_; +    } +    SizedInnerIterator &operator+=(std::ptrdiff_t amount) { +      ptr_ += amount * size_; +      return *this; +    } +    std::ptrdiff_t operator-(const SizedInnerIterator &other) const { +      return (ptr_ - other.ptr_) / size_; +    } + +    const void *Data() const { return ptr_; } +    void *Data() { return ptr_; } +    std::size_t EntrySize() const { return size_; } + +  private: +    uint8_t *ptr_; +    std::size_t size_; +}; + +class SizedProxy { +  public: +    SizedProxy() {} + +    SizedProxy(void *ptr, std::size_t size) : inner_(ptr, size) {} + +    operator std::string() const { +      return std::string(reinterpret_cast<const char*>(inner_.Data()), inner_.EntrySize()); +    } + +    SizedProxy &operator=(const SizedProxy &from) { +      memcpy(inner_.Data(), from.inner_.Data(), inner_.EntrySize()); +      return *this; +    } + +    SizedProxy &operator=(const std::string &from) { +      memcpy(inner_.Data(), from.data(), inner_.EntrySize()); +      return *this; +    } + +    const void *Data() const { return inner_.Data(); } +    void *Data() { return inner_.Data(); } + +  private: +    friend class util::ProxyIterator<SizedProxy>; + +    typedef std::string value_type; + +    typedef SizedInnerIterator InnerIterator; + +    InnerIterator &Inner() { return inner_; } +    const InnerIterator &Inner() const { return inner_; } +    InnerIterator inner_; +}; + +typedef ProxyIterator<SizedProxy> SizedIterator; + +inline SizedIterator SizedIt(void *ptr, std::size_t size) { return SizedIterator(SizedProxy(ptr, size)); } + +// Useful wrapper for a comparison function i.e. sort.   +template <class Delegate, class Proxy = SizedProxy> class SizedCompare : public std::binary_function<const Proxy &, const Proxy &, bool> { +  public: +    explicit SizedCompare(const Delegate &delegate = Delegate()) : delegate_(delegate) {} + +    bool operator()(const Proxy &first, const Proxy &second) const { +      return delegate_(first.Data(), second.Data()); +    } +    bool operator()(const Proxy &first, const std::string &second) const { +      return delegate_(first.Data(), second.data()); +    } +    bool operator()(const std::string &first, const Proxy &second) const { +      return delegate_(first.data(), second.Data()); +    } +    bool operator()(const std::string &first, const std::string &second) const { +      return delegate_(first.data(), second.data()); +    } + +    const Delegate &GetDelegate() const { return delegate_; } +     +  private: +    const Delegate delegate_; +}; + +} // namespace util +#endif // UTIL_SIZED_ITERATOR__ diff --git a/klm/util/tokenize_piece.hh b/klm/util/tokenize_piece.hh new file mode 100644 index 00000000..ee1c7ab2 --- /dev/null +++ b/klm/util/tokenize_piece.hh @@ -0,0 +1,69 @@ +#ifndef UTIL_TOKENIZE_PIECE__ +#define UTIL_TOKENIZE_PIECE__ + +#include "util/string_piece.hh" + +#include <boost/iterator/iterator_facade.hpp> + +/* Usage: + * + * for (PieceIterator<' '> i(" foo \r\n bar "); i; ++i) { + *   std::cout << *i << "\n"; + * } + * + */ + +namespace util { + +// Tokenize a StringPiece using an iterator interface.  boost::tokenizer doesn't work with StringPiece. +template <char d> class PieceIterator : public boost::iterator_facade<PieceIterator<d>, const StringPiece, boost::forward_traversal_tag> { +  public: +    // Default construct is end, which is also accessed by kEndPieceIterator; +    PieceIterator() {} + +    explicit PieceIterator(const StringPiece &str) +      : after_(str) { +        increment(); +      } + +    bool operator!() const { +      return after_.data() == 0; +    } +    operator bool() const { +      return after_.data() != 0; +    } + +    static PieceIterator<d> end() { +      return PieceIterator<d>(); +    } + +  private: +    friend class boost::iterator_core_access; + +    void increment() { +      const char *start = after_.data(); +      for (; (start != after_.data() + after_.size()) && (d == *start); ++start) {} +      if (start == after_.data() + after_.size()) { +        // End condition. +        after_.clear(); +        return; +      } +      const char *finish = start; +      for (; (finish != after_.data() + after_.size()) && (d != *finish); ++finish) {} +      current_ = StringPiece(start, finish - start); +      after_ = StringPiece(finish, after_.data() + after_.size() - finish); +    } + +    bool equal(const PieceIterator &other) const { +      return after_.data() == other.after_.data(); +    } + +    const StringPiece &dereference() const { return current_; } + +    StringPiece current_; +    StringPiece after_; +}; + +} // namespace util + +#endif // UTIL_TOKENIZE_PIECE__  | 
