diff options
Diffstat (limited to 'utils')
| -rwxr-xr-x | utils/indices_after.h | 10 | ||||
| -rw-r--r-- | utils/logval.h | 36 | ||||
| -rw-r--r-- | utils/sparse_vector.h | 136 | ||||
| -rwxr-xr-x | utils/string_to.h | 314 | ||||
| -rw-r--r-- | utils/warning_compiler.h | 15 | ||||
| -rw-r--r-- | utils/warning_pop.h | 8 | ||||
| -rw-r--r-- | utils/warning_push.h | 8 | 
7 files changed, 493 insertions, 34 deletions
| diff --git a/utils/indices_after.h b/utils/indices_after.h index 2891563c..62683f39 100755 --- a/utils/indices_after.h +++ b/utils/indices_after.h @@ -47,8 +47,8 @@ unsigned new_indices_keep_if_n(unsigned n,It i,KeepIf const& r,O out)  }  template <class KEEP,class O> -unsigned new_indices(KEEP keep,O out) { -  return new_indices(keep.begin(),keep.end(),out); +unsigned new_indices_keep(KEEP keep,O out) { +  return new_indices_keep(keep.begin(),keep.end(),out);  }  template <class V,class Out,class Permi> @@ -129,8 +129,10 @@ struct indices_after      if (n_mapped>0) {        map=(unsigned *)::operator new(sizeof(unsigned)*n_mapped);        n_kept=new_indices_keep(i,end,map); -    } else +    } else { +      n_kept=0;        map=NULL; +    }    }    template <class A>    void init(A const& a) @@ -139,7 +141,7 @@ struct indices_after    }    template <class A> -  indices_after(A const& a) +  explicit indices_after(A const& a)    {      init(a.begin(),a.end());    } diff --git a/utils/logval.h b/utils/logval.h index 37f14ae5..b337cf0e 100644 --- a/utils/logval.h +++ b/utils/logval.h @@ -3,12 +3,13 @@  #define LOGVAL_CHECK_NEG false +#include <boost/functional/hash.hpp>  #include <iostream>  #include <cstdlib>  #include <cmath>  #include <limits> -template <typename T> +template <class T>  class LogVal {   public:    LogVal() : s_(), v_(-std::numeric_limits<T>::infinity()) {} @@ -23,6 +24,11 @@ class LogVal {    static LogVal<T> e() { return LogVal(1,false); }    void logeq(const T& v) { s_ = false; v_ = v; } +  std::size_t hash_impl() const { +    using namespace boost; +    return hash_value(v_)+s_; +  } +    LogVal& operator+=(const LogVal& a) {      if (a.v_ == -std::numeric_limits<T>::infinity()) return *this;      if (a.s_ == s_) { @@ -98,31 +104,31 @@ class LogVal {  };  // copy elision - as opposed to explicit copy of LogVal<T> const& o1, we should be able to construct Logval r=a+(b+c) as a single result in place in r.  todo: return std::move(o1) - C++0x -template<typename T> +template<class T>  LogVal<T> operator+(LogVal<T> o1, const LogVal<T>& o2) {    o1 += o2;    return o1;  } -template<typename T> +template<class T>  LogVal<T> operator*(LogVal<T> o1, const LogVal<T>& o2) {    o1 *= o2;    return o1;  } -template<typename T> +template<class T>  LogVal<T> operator/(LogVal<T> o1, const LogVal<T>& o2) {    o1 /= o2;    return o1;  } -template<typename T> +template<class T>  LogVal<T> operator-(LogVal<T> o1, const LogVal<T>& o2) {    o1 -= o2;    return o1;  } -template<typename T> +template<class T>  T log(const LogVal<T>& o) {  #ifdef LOGVAL_CHECK_NEG    if (o.s_) return log(-1.0); @@ -130,12 +136,12 @@ T log(const LogVal<T>& o) {    return o.v_;  } -template <typename T> +template <class T>  LogVal<T> pow(const LogVal<T>& b, const T& e) {    return b.pow(e);  } -template <typename T> +template <class T>  bool operator<(const LogVal<T>& lhs, const LogVal<T>& rhs) {    if (lhs.s_ == rhs.s_) {      return (lhs.v_ < rhs.v_); @@ -145,28 +151,32 @@ bool operator<(const LogVal<T>& lhs, const LogVal<T>& rhs) {  }  #if 0 -template <typename T> +template <class T>  bool operator<=(const LogVal<T>& lhs, const LogVal<T>& rhs) {    return (lhs.v_ <= rhs.v_);  } -template <typename T> +template <class T>  bool operator>(const LogVal<T>& lhs, const LogVal<T>& rhs) {    return (lhs.v_ > rhs.v_);  } -template <typename T> +template <class T>  bool operator>=(const LogVal<T>& lhs, const LogVal<T>& rhs) {    return (lhs.v_ >= rhs.v_);  }  #endif -template <typename T> + +template <class T> +std::size_t hash_value(const LogVal<T>& x) { return x.hash_impl(); } + +template <class T>  bool operator==(const LogVal<T>& lhs, const LogVal<T>& rhs) {    return (lhs.v_ == rhs.v_) && (lhs.s_ == rhs.s_);  } -template <typename T> +template <class T>  bool operator!=(const LogVal<T>& lhs, const LogVal<T>& rhs) {    return !(lhs == rhs);  } diff --git a/utils/sparse_vector.h b/utils/sparse_vector.h index e8e9c2f7..7ac85d1d 100644 --- a/utils/sparse_vector.h +++ b/utils/sparse_vector.h @@ -1,16 +1,13 @@  #ifndef _SPARSE_VECTOR_H_  #define _SPARSE_VECTOR_H_ +/* +TODO: specialize for int value types, where it probably makes sense to check if adding/subtracting brings a value to 0, and remove it from the map (e.g. in a gibbs sampler).  or add a separate policy argument for that. + */ +  //#define SPARSE_VECTOR_HASH +// if defined, use hash_map rather than map.  map is probably faster/smaller for small vectors -#ifdef SPARSE_VECTOR_HASH -#include "hash.h" -# define SPARSE_VECTOR_MAP HASH_MAP -# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted) HASH_MAP_RESERVED(h,empty,deleted) -#else -# define SPARSE_VECTOR_MAP std::map -# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted) -#endif  /*     use SparseVectorList (pair smallvector) for feat funcs / hypergraphs (you rarely need random access; just append a feature to the list)  */ @@ -38,6 +35,17 @@  // this is a modified version of code originally written  // by Phil Blunsom +#include <boost/functional/hash.hpp> +#include <stdexcept> +#ifdef SPARSE_VECTOR_HASH +#include "hash.h" +# define SPARSE_VECTOR_MAP HASH_MAP +# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted) HASH_MAP_RESERVED(h,empty,deleted) +#else +# define SPARSE_VECTOR_MAP std::map +# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted) +#endif +  #include <iostream>  #include <map>  #include <tr1/unordered_map> @@ -46,6 +54,7 @@  #include "fdict.h"  #include "small_vector.h" +#include "string_to.h"  template <class T>  inline T & extend_vector(std::vector<T> &v,int i) { @@ -54,7 +63,7 @@ inline T & extend_vector(std::vector<T> &v,int i) {    return v[i];  } -template <typename T> +template <class T>  class SparseVector {    void init_reserved() {      SPARSE_VECTOR_MAP_RESERVED(values_,-1,-2); @@ -71,17 +80,97 @@ public:    SparseVector() {      init_reserved();    } +  typedef typename MapType::value_type value_type; +  typedef typename MapType::iterator iterator;    explicit SparseVector(std::vector<T> const& v) {      init_reserved(); -    typename MapType::iterator p=values_.begin(); +    iterator p=values_.begin();      const T z=0;      for (unsigned i=0;i<v.size();++i) {        T const& t=v[i];        if (t!=z) -        p=values_.insert(p,typename MapType::value_type(i,t)); //hint makes insertion faster +        p=values_.insert(p,value_type(i,t)); //hint makes insertion faster +    } +  } + +  typedef char const* Str; +  template <class O> +  void print(O &o,Str pre="",Str post="",Str kvsep="=",Str pairsep=" ") const { +    o << pre; +    bool first=true; +    for (const_iterator i=values_.begin(),e=values_.end();i!=e;++i) { +      if (first) +        first=false; +      else +        o<<pairsep; +      o<<FD::Convert(i->first)<<kvsep<<i->second;      } +    o << post;    } +  static void error(std::string const& msg) { +    throw std::runtime_error("SparseVector: "+msg); +  } + +  enum DupPolicy { +    NO_DUPS, +    KEEP_FIRST, +    KEEP_LAST, +    SUM +  }; + +  // either key val alternating whitespace sep, or key=val (kvsep char is '=').  end at eof or terminator (non-ws) char +  template <class S> +  void read(S &s,DupPolicy dp=NO_DUPS,bool use_kvsep=true,char kvsep='=',bool stop_at_terminator=false,char terminator=')') { +    values_.clear(); +    std::string id; +    WordID k; +    T v; +#undef SPARSE_MUST_READ +#define SPARSE_MUST_READ(x) if (!(x)) error(#x); +    int ki; +    while (s) { +      if (stop_at_terminator) { +        char c; +        if (!(s>>c)) goto eof; +        s.unget(); +        if (c==terminator) return; +      } +      if (!(s>>id)) goto eof; +      if (use_kvsep && (ki=id.find(kvsep))!=std::string::npos) { +        k=FD::Convert(std::string(id,0,ki)); +        string_into(id.c_str()+ki+1,v); +      } else { +        k=FD::Convert(id); +        if (!(s>>v)) error("reading value failed"); +      } +      std::pair<iterator,bool> vi=values_.insert(value_type(k,v)); +      if (vi.second) { +        T &oldv=vi.first->second; +        switch(dp) { +        case NO_DUPS: error("read duplicate key with NO_DUPS.  key=" +                            +FD::Convert(k)+" val="+to_string(v)+" old-val="+to_string(oldv)); +          break; +        case KEEP_FIRST: break; +        case KEEP_LAST: oldv=v; break; +        case SUM: oldv+=v; break; +        } +      } +    } +    return; +  eof: +    if (!s.eof()) error("reading key failed (before EOF)"); +  } + +  friend inline std::ostream & operator<<(std::ostream &o,Self const& s) { +    s.print(o); +    return o; +  } + +  friend inline std::istream & operator>>(std::istream &o,Self & s) { +    s.read(o); +    return o; +  }    void init_vector(std::vector<T> *vp) const {      init_vector(*vp); @@ -118,6 +207,10 @@ public:      return values_[index];    } +  inline void maybe_set_value(int index, const T &value) { +    if (value) values_[index] = value; +  } +    inline void set_value(int index, const T &value) {      values_[index] = value;    } @@ -352,6 +445,10 @@ public:      return size()==other.size() && contains_keys_of(other) && other.contains_i(*this);    } +  std::size_t hash_impl() const { +    return boost::hash_range(begin(),end()); +  } +    bool contains(Self const &o) const {      return size()>o.size() && contains(o);    } @@ -371,7 +468,7 @@ public:    bool contains_keys_of(Self const& o) const {      for (typename MapType::const_iterator i=o.begin(),e=o.end();i!=e;++i) -      if (values_.find(i)==values_.end()) +      if (values_.find(i->first)==values_.end())          return false;      return true;    } @@ -478,31 +575,36 @@ private:    List p;  }; -template <typename T> +template <class T> +std::size_t hash_value(SparseVector<T> const& x) { +  return x.hash_impl(); +} + +template <class T>  SparseVector<T> operator+(const SparseVector<T>& a, const SparseVector<T>& b) {    SparseVector<T> result = a;    return result += b;  } -template <typename T> +template <class T>  SparseVector<T> operator*(const SparseVector<T>& a, const double& b) {    SparseVector<T> result = a;    return result *= b;  } -template <typename T> +template <class T>  SparseVector<T> operator*(const SparseVector<T>& a, const T& b) {    SparseVector<T> result = a;    return result *= b;  } -template <typename T> +template <class T>  SparseVector<T> operator*(const double& a, const SparseVector<T>& b) {    SparseVector<T> result = b;    return result *= a;  } -template <typename T> +template <class T>  std::ostream &operator<<(std::ostream &out, const SparseVector<T> &vec)  {      return vec.operator<<(out); diff --git a/utils/string_to.h b/utils/string_to.h new file mode 100755 index 00000000..c78a5394 --- /dev/null +++ b/utils/string_to.h @@ -0,0 +1,314 @@ +#ifndef STRING_TO_H +#define STRING_TO_H + +/* +   may not be any faster than boost::lexical_cast in later incarnations (see http://accu.org/index.php/journals/1375) +   but is slightly simpler.  no wide char or locale. + +   X string_to<X>(string); +   string to_string(X); +   X& string_into(string,X &); // note: returns the same ref you passed in, for convenience of use + +   default implementation via stringstreams (quite slow, I'm sure) + +   fast implementation for string, int<->string, unsigned<->string, float<->string, double<->string + +*/ + +#ifndef USE_FTOA +#define USE_FTOA 1 +#endif +#ifndef HAVE_STRTOUL +# define HAVE_STRTOUL 1 +#endif + +#include <string> +#include <sstream> +#include <stdexcept> +#include <cstdlib> + +#include "have_64_bits.h" +#include "utoa.h" +#if USE_FTOA +# include "ftoa.h" +#endif + +namespace { +// for faster numeric to/from string.  TODO: separate into optional header +#include <stdio.h> +#include <ctype.h> +#include <stdlib.h> // access to evil (fast) C isspace etc. +#include <limits.h> //strtoul +} + +inline void throw_string_to(std::string const& msg,char const* prefix="string_to: ") { +  throw std::runtime_error(prefix+msg); +} + +template <class I,class To> +bool try_stream_into(I & i,To &to,bool complete=true) +{ +    i >> to; +    if (i.fail()) return false; +    if (complete) { +        char c; +        return !(i >> c); +    } +    return true; +} + +template <class Str,class To> +bool try_string_into(Str const& str,To &to,bool complete=true) +{ +    std::istringstream i(str); +    return try_stream_into(i,to,complete); +} + +template <class Str,class Data> inline +Data & string_into(const Str &str,Data &data) +{ +    if (!try_string_into(str,data)) +        throw std::runtime_error(std::string("Couldn't convert (string_into): ")+str); +    return data; +} + + +template <class Data,class Str> inline +Data string_to(const Str &str) +{ +    Data ret; +    string_into(str,ret); +    return ret; +} + +template <class D> inline +std::string to_string(D const &d) +{ +    std::ostringstream o; +    o << d; +    return o.str(); +} + +inline std::string to_string(unsigned x) { +  return utos(x); +} + +inline std::string to_string(int x) { +  return itos(x); +} + +inline long strtol_complete(char const* s,int base=10) { +  char *e; +  if (*s) { +    long r=strtol(s,&e,base); +    char c=*e; +    if (!c || isspace(c)) //simplifying assumption: we're happy if there's other stuff in the string, so long as the number ends in a space or eos.  TODO: loop consuming spaces until end? +      return r; +  } +  throw_string_to(s,"Couldn't convert to integer: "); +} + +// returns -INT_MAX or INT_MAX if number is too large/small +inline int strtoi_complete_bounded(char const* s,int base=10) { +  long l=strtol_complete(s,base); +  if (l<std::numeric_limits<int>::min()) +    return std::numeric_limits<int>::min(); +  if (l>std::numeric_limits<int>::max()) +    return std::numeric_limits<int>::max(); +  return l; +} +#define RANGE_STR(x) #x +#ifdef INT_MIN +# define INTRANGE_STR "[" RANGE_STR(INT_MIN) "," RANGE_STR(INT_MAX) "]" +#else +# define INTRANGE_STR "[-2137483648,2147483647]" +#endif + +  // throw if out of int range +inline int strtoi_complete_exact(char const* s,int base=10) { +  long l=strtol_complete(s,base); +  if (l<std::numeric_limits<int>::min() || l>std::numeric_limits<int>::max()) +    throw_string_to(s,"Out of range for int " INTRANGE_STR ": "); +  return l; +} + +#if HAVE_LONGER_LONG +inline int& string_into(std::string const& s,int &x) { +  x=strtoi_complete_exact(s.c_str()); +  return x; +} +inline int& string_into(char const* s,int &x) { +  x=strtoi_complete_exact(s); +  return x; +} +#endif + +inline long& string_into(std::string const& s,long &x) { +  x=strtol_complete(s.c_str()); +  return x; +} +inline long& string_into(char const* s,long &x) { +  x=strtol_complete(s); +  return x; +} + + +//FIXME: preprocessor separation for tokens int<->unsigned int, long<->unsigned long, strtol<->strtoul ?  massive code duplication +inline unsigned long strtoul_complete(char const* s,int base=10) { +  char *e; +  if (*s) { +#if HAVE_STRTOUL +    unsigned long r=strtoul(s,&e,base); +#else +//    unsigned long r=strtol(s,&e,base); //FIXME: not usually safe +    unsigned long r; +    sscanf(s,"%ul",&r); +#endif +    char c=*e; +    if (!c || isspace(c)) //simplifying assumption: we're happy if there's other stuff in the string, so long as the number ends in a space or eos.  TODO: loop consuming spaces until end? +      return r; +  } +  throw_string_to(s,"Couldn't convert to integer: "); +} + +inline unsigned strtou_complete_bounded(char const* s,int base=10) { +  unsigned long l=strtoul_complete(s,base); +  if (l<std::numeric_limits<unsigned>::min()) +    return std::numeric_limits<unsigned>::min(); +  if (l>std::numeric_limits<unsigned>::max()) +    return std::numeric_limits<unsigned>::max(); +  return l; +} + +#ifdef UINT_MIN +# define UINTRANGE_STR "[" RANGE_STR(UINT_MIN) "," RANGE_STR(UINT_MAX) "]" +#else +# define UINTRANGE_STR "[0,4,294,967,295]" +#endif + +  // throw if out of int range +inline unsigned strtou_complete_exact(char const* s,int base=10) { +  unsigned long l=strtoul_complete(s,base); +  if (l<std::numeric_limits<unsigned>::min() || l>std::numeric_limits<unsigned>::max()) +    throw_string_to(s,"Out of range for uint " UINTRANGE_STR ": "); +  return l; +} + +#if HAVE_LONGER_LONG +inline unsigned& string_into(std::string const& s,unsigned &x) { +  x=strtou_complete_exact(s.c_str()); +  return x; +} +inline unsigned& string_into(char const* s,unsigned &x) { +  x=strtou_complete_exact(s); +  return x; +} +#endif + +inline unsigned long& string_into(std::string const& s,unsigned long &x) { +  x=strtoul_complete(s.c_str()); +  return x; +} +inline unsigned long& string_into(char const* s,unsigned long &x) { +  x=strtoul_complete(s); +  return x; +} + +//FIXME: end code duplication + + +/* 9 decimal places needed to avoid rounding error in float->string->float.  17 for double->string->double +   in terms of usable decimal places, there are 6 for float and 15 for double + */ +inline std::string to_string_roundtrip(float x) { +  char buf[17]; +  return std::string(buf,buf+sprintf(buf,"%.9g",x)); +} +inline std::string to_string(float x) { +#if USE_FTOA +  return ftos(x); +#else +  char buf[15]; +  return std::string(buf,buf+sprintf(buf,"%.7g",x)); +#endif +} +inline std::string to_string_roundtrip(double x) { +  char buf[32]; +  return std::string(buf,buf+sprintf(buf,"%.17g",x)); +} +inline std::string to_string(double x) { +#if USE_FTOA +  return ftos(x); +#else +  char buf[30]; +  return std::string(buf,buf+sprintf(buf,"%.15g",x)); +#endif +} + +inline double& string_into(char const* s,double &x) { +  x=std::atof(s); +  return x; +} +inline float& string_into(char const* s,float &x) { +  x=std::atof(s); +  return x; +} + +inline double& string_into(std::string const& s,double &x) { +  x=std::atof(s.c_str()); +  return x; +} +inline float& string_into(std::string const& s,float &x) { +  x=std::atof(s.c_str()); +  return x; +} + + +template <class Str> +bool try_string_into(Str const& str,Str &to,bool complete=true) +{ +    str=to; +    return true; +} + +inline std::string const& to_string(std::string const& d) +{ +    return d; +} + +template <class Str> +Str const& string_to(Str const &s) +{ +    return s; +} + +template <class Str> +Str & string_into(Str const &s,Str &d) +{ +    return d=s; +} + +/* + +template <class Str,class Data,class size_type> inline +void substring_into(const Str &str,size_type pos,size_type n,Data &data) +{ +//    std::istringstream i(str,pos,n); // doesn't exist! +    std::istringstream i(str.substr(pos,n)); +    if (!(i>>*data)) +        throw std::runtime_error("Couldn't convert (string_into): "+str); +} + +template <class Data,class Str,class size_type> inline +Data string_to(const Str &str,size_type pos,size_type n) +{ +    Data ret; +    substring_into(str,pos,n,ret); +    return ret; +} + +*/ + + + +#endif diff --git a/utils/warning_compiler.h b/utils/warning_compiler.h new file mode 100644 index 00000000..2052cff3 --- /dev/null +++ b/utils/warning_compiler.h @@ -0,0 +1,15 @@ +#ifndef WARNING_COMPILER_HPP +#define WARNING_COMPILER_HPP + +#ifndef HAVE_GCC_4_4 +#undef HAVE_DIAGNOSTIC_PUSH +#if __GNUC__ > 4 || __GNUC__==4 && __GNUC_MINOR__ > 3 +# define HAVE_GCC_4_4 1 +# define HAVE_DIAGNOSTIC_PUSH 1 +#else +# define HAVE_GCC_4_4 0 +# define HAVE_DIAGNOSTIC_PUSH 0 +#endif +#endif + +#endif diff --git a/utils/warning_pop.h b/utils/warning_pop.h new file mode 100644 index 00000000..2be5d9ed --- /dev/null +++ b/utils/warning_pop.h @@ -0,0 +1,8 @@ +#ifdef _MSC_VER +#pragma warning( pop ) +#else +#include "warning_compiler.h" +#if HAVE_DIAGNOSTIC_PUSH +# pragma GCC diagnostic pop +#endif +#endif diff --git a/utils/warning_push.h b/utils/warning_push.h new file mode 100644 index 00000000..086fd524 --- /dev/null +++ b/utils/warning_push.h @@ -0,0 +1,8 @@ +#ifdef _MSC_VER +#pragma warning( pop ) +#else +#include "warning_compiler.h" +#if HAVE_DIAGNOSTIC_PUSH +# pragma GCC diagnostic push +#endif +#endif | 
