diff options
Diffstat (limited to 'utils')
-rwxr-xr-x | utils/indices_after.h | 10 | ||||
-rw-r--r-- | utils/logval.h | 36 | ||||
-rw-r--r-- | utils/sparse_vector.h | 136 | ||||
-rwxr-xr-x | utils/string_to.h | 314 | ||||
-rw-r--r-- | utils/warning_compiler.h | 15 | ||||
-rw-r--r-- | utils/warning_pop.h | 8 | ||||
-rw-r--r-- | utils/warning_push.h | 8 |
7 files changed, 493 insertions, 34 deletions
diff --git a/utils/indices_after.h b/utils/indices_after.h index 2891563c..62683f39 100755 --- a/utils/indices_after.h +++ b/utils/indices_after.h @@ -47,8 +47,8 @@ unsigned new_indices_keep_if_n(unsigned n,It i,KeepIf const& r,O out) } template <class KEEP,class O> -unsigned new_indices(KEEP keep,O out) { - return new_indices(keep.begin(),keep.end(),out); +unsigned new_indices_keep(KEEP keep,O out) { + return new_indices_keep(keep.begin(),keep.end(),out); } template <class V,class Out,class Permi> @@ -129,8 +129,10 @@ struct indices_after if (n_mapped>0) { map=(unsigned *)::operator new(sizeof(unsigned)*n_mapped); n_kept=new_indices_keep(i,end,map); - } else + } else { + n_kept=0; map=NULL; + } } template <class A> void init(A const& a) @@ -139,7 +141,7 @@ struct indices_after } template <class A> - indices_after(A const& a) + explicit indices_after(A const& a) { init(a.begin(),a.end()); } diff --git a/utils/logval.h b/utils/logval.h index 37f14ae5..b337cf0e 100644 --- a/utils/logval.h +++ b/utils/logval.h @@ -3,12 +3,13 @@ #define LOGVAL_CHECK_NEG false +#include <boost/functional/hash.hpp> #include <iostream> #include <cstdlib> #include <cmath> #include <limits> -template <typename T> +template <class T> class LogVal { public: LogVal() : s_(), v_(-std::numeric_limits<T>::infinity()) {} @@ -23,6 +24,11 @@ class LogVal { static LogVal<T> e() { return LogVal(1,false); } void logeq(const T& v) { s_ = false; v_ = v; } + std::size_t hash_impl() const { + using namespace boost; + return hash_value(v_)+s_; + } + LogVal& operator+=(const LogVal& a) { if (a.v_ == -std::numeric_limits<T>::infinity()) return *this; if (a.s_ == s_) { @@ -98,31 +104,31 @@ class LogVal { }; // copy elision - as opposed to explicit copy of LogVal<T> const& o1, we should be able to construct Logval r=a+(b+c) as a single result in place in r. todo: return std::move(o1) - C++0x -template<typename T> +template<class T> LogVal<T> operator+(LogVal<T> o1, const LogVal<T>& o2) { o1 += o2; return o1; } -template<typename T> +template<class T> LogVal<T> operator*(LogVal<T> o1, const LogVal<T>& o2) { o1 *= o2; return o1; } -template<typename T> +template<class T> LogVal<T> operator/(LogVal<T> o1, const LogVal<T>& o2) { o1 /= o2; return o1; } -template<typename T> +template<class T> LogVal<T> operator-(LogVal<T> o1, const LogVal<T>& o2) { o1 -= o2; return o1; } -template<typename T> +template<class T> T log(const LogVal<T>& o) { #ifdef LOGVAL_CHECK_NEG if (o.s_) return log(-1.0); @@ -130,12 +136,12 @@ T log(const LogVal<T>& o) { return o.v_; } -template <typename T> +template <class T> LogVal<T> pow(const LogVal<T>& b, const T& e) { return b.pow(e); } -template <typename T> +template <class T> bool operator<(const LogVal<T>& lhs, const LogVal<T>& rhs) { if (lhs.s_ == rhs.s_) { return (lhs.v_ < rhs.v_); @@ -145,28 +151,32 @@ bool operator<(const LogVal<T>& lhs, const LogVal<T>& rhs) { } #if 0 -template <typename T> +template <class T> bool operator<=(const LogVal<T>& lhs, const LogVal<T>& rhs) { return (lhs.v_ <= rhs.v_); } -template <typename T> +template <class T> bool operator>(const LogVal<T>& lhs, const LogVal<T>& rhs) { return (lhs.v_ > rhs.v_); } -template <typename T> +template <class T> bool operator>=(const LogVal<T>& lhs, const LogVal<T>& rhs) { return (lhs.v_ >= rhs.v_); } #endif -template <typename T> + +template <class T> +std::size_t hash_value(const LogVal<T>& x) { return x.hash_impl(); } + +template <class T> bool operator==(const LogVal<T>& lhs, const LogVal<T>& rhs) { return (lhs.v_ == rhs.v_) && (lhs.s_ == rhs.s_); } -template <typename T> +template <class T> bool operator!=(const LogVal<T>& lhs, const LogVal<T>& rhs) { return !(lhs == rhs); } diff --git a/utils/sparse_vector.h b/utils/sparse_vector.h index e8e9c2f7..7ac85d1d 100644 --- a/utils/sparse_vector.h +++ b/utils/sparse_vector.h @@ -1,16 +1,13 @@ #ifndef _SPARSE_VECTOR_H_ #define _SPARSE_VECTOR_H_ +/* +TODO: specialize for int value types, where it probably makes sense to check if adding/subtracting brings a value to 0, and remove it from the map (e.g. in a gibbs sampler). or add a separate policy argument for that. + */ + //#define SPARSE_VECTOR_HASH +// if defined, use hash_map rather than map. map is probably faster/smaller for small vectors -#ifdef SPARSE_VECTOR_HASH -#include "hash.h" -# define SPARSE_VECTOR_MAP HASH_MAP -# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted) HASH_MAP_RESERVED(h,empty,deleted) -#else -# define SPARSE_VECTOR_MAP std::map -# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted) -#endif /* use SparseVectorList (pair smallvector) for feat funcs / hypergraphs (you rarely need random access; just append a feature to the list) */ @@ -38,6 +35,17 @@ // this is a modified version of code originally written // by Phil Blunsom +#include <boost/functional/hash.hpp> +#include <stdexcept> +#ifdef SPARSE_VECTOR_HASH +#include "hash.h" +# define SPARSE_VECTOR_MAP HASH_MAP +# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted) HASH_MAP_RESERVED(h,empty,deleted) +#else +# define SPARSE_VECTOR_MAP std::map +# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted) +#endif + #include <iostream> #include <map> #include <tr1/unordered_map> @@ -46,6 +54,7 @@ #include "fdict.h" #include "small_vector.h" +#include "string_to.h" template <class T> inline T & extend_vector(std::vector<T> &v,int i) { @@ -54,7 +63,7 @@ inline T & extend_vector(std::vector<T> &v,int i) { return v[i]; } -template <typename T> +template <class T> class SparseVector { void init_reserved() { SPARSE_VECTOR_MAP_RESERVED(values_,-1,-2); @@ -71,17 +80,97 @@ public: SparseVector() { init_reserved(); } + typedef typename MapType::value_type value_type; + typedef typename MapType::iterator iterator; explicit SparseVector(std::vector<T> const& v) { init_reserved(); - typename MapType::iterator p=values_.begin(); + iterator p=values_.begin(); const T z=0; for (unsigned i=0;i<v.size();++i) { T const& t=v[i]; if (t!=z) - p=values_.insert(p,typename MapType::value_type(i,t)); //hint makes insertion faster + p=values_.insert(p,value_type(i,t)); //hint makes insertion faster + } + } + + typedef char const* Str; + template <class O> + void print(O &o,Str pre="",Str post="",Str kvsep="=",Str pairsep=" ") const { + o << pre; + bool first=true; + for (const_iterator i=values_.begin(),e=values_.end();i!=e;++i) { + if (first) + first=false; + else + o<<pairsep; + o<<FD::Convert(i->first)<<kvsep<<i->second; } + o << post; } + static void error(std::string const& msg) { + throw std::runtime_error("SparseVector: "+msg); + } + + enum DupPolicy { + NO_DUPS, + KEEP_FIRST, + KEEP_LAST, + SUM + }; + + // either key val alternating whitespace sep, or key=val (kvsep char is '='). end at eof or terminator (non-ws) char + template <class S> + void read(S &s,DupPolicy dp=NO_DUPS,bool use_kvsep=true,char kvsep='=',bool stop_at_terminator=false,char terminator=')') { + values_.clear(); + std::string id; + WordID k; + T v; +#undef SPARSE_MUST_READ +#define SPARSE_MUST_READ(x) if (!(x)) error(#x); + int ki; + while (s) { + if (stop_at_terminator) { + char c; + if (!(s>>c)) goto eof; + s.unget(); + if (c==terminator) return; + } + if (!(s>>id)) goto eof; + if (use_kvsep && (ki=id.find(kvsep))!=std::string::npos) { + k=FD::Convert(std::string(id,0,ki)); + string_into(id.c_str()+ki+1,v); + } else { + k=FD::Convert(id); + if (!(s>>v)) error("reading value failed"); + } + std::pair<iterator,bool> vi=values_.insert(value_type(k,v)); + if (vi.second) { + T &oldv=vi.first->second; + switch(dp) { + case NO_DUPS: error("read duplicate key with NO_DUPS. key=" + +FD::Convert(k)+" val="+to_string(v)+" old-val="+to_string(oldv)); + break; + case KEEP_FIRST: break; + case KEEP_LAST: oldv=v; break; + case SUM: oldv+=v; break; + } + } + } + return; + eof: + if (!s.eof()) error("reading key failed (before EOF)"); + } + + friend inline std::ostream & operator<<(std::ostream &o,Self const& s) { + s.print(o); + return o; + } + + friend inline std::istream & operator>>(std::istream &o,Self & s) { + s.read(o); + return o; + } void init_vector(std::vector<T> *vp) const { init_vector(*vp); @@ -118,6 +207,10 @@ public: return values_[index]; } + inline void maybe_set_value(int index, const T &value) { + if (value) values_[index] = value; + } + inline void set_value(int index, const T &value) { values_[index] = value; } @@ -352,6 +445,10 @@ public: return size()==other.size() && contains_keys_of(other) && other.contains_i(*this); } + std::size_t hash_impl() const { + return boost::hash_range(begin(),end()); + } + bool contains(Self const &o) const { return size()>o.size() && contains(o); } @@ -371,7 +468,7 @@ public: bool contains_keys_of(Self const& o) const { for (typename MapType::const_iterator i=o.begin(),e=o.end();i!=e;++i) - if (values_.find(i)==values_.end()) + if (values_.find(i->first)==values_.end()) return false; return true; } @@ -478,31 +575,36 @@ private: List p; }; -template <typename T> +template <class T> +std::size_t hash_value(SparseVector<T> const& x) { + return x.hash_impl(); +} + +template <class T> SparseVector<T> operator+(const SparseVector<T>& a, const SparseVector<T>& b) { SparseVector<T> result = a; return result += b; } -template <typename T> +template <class T> SparseVector<T> operator*(const SparseVector<T>& a, const double& b) { SparseVector<T> result = a; return result *= b; } -template <typename T> +template <class T> SparseVector<T> operator*(const SparseVector<T>& a, const T& b) { SparseVector<T> result = a; return result *= b; } -template <typename T> +template <class T> SparseVector<T> operator*(const double& a, const SparseVector<T>& b) { SparseVector<T> result = b; return result *= a; } -template <typename T> +template <class T> std::ostream &operator<<(std::ostream &out, const SparseVector<T> &vec) { return vec.operator<<(out); diff --git a/utils/string_to.h b/utils/string_to.h new file mode 100755 index 00000000..c78a5394 --- /dev/null +++ b/utils/string_to.h @@ -0,0 +1,314 @@ +#ifndef STRING_TO_H +#define STRING_TO_H + +/* + may not be any faster than boost::lexical_cast in later incarnations (see http://accu.org/index.php/journals/1375) + but is slightly simpler. no wide char or locale. + + X string_to<X>(string); + string to_string(X); + X& string_into(string,X &); // note: returns the same ref you passed in, for convenience of use + + default implementation via stringstreams (quite slow, I'm sure) + + fast implementation for string, int<->string, unsigned<->string, float<->string, double<->string + +*/ + +#ifndef USE_FTOA +#define USE_FTOA 1 +#endif +#ifndef HAVE_STRTOUL +# define HAVE_STRTOUL 1 +#endif + +#include <string> +#include <sstream> +#include <stdexcept> +#include <cstdlib> + +#include "have_64_bits.h" +#include "utoa.h" +#if USE_FTOA +# include "ftoa.h" +#endif + +namespace { +// for faster numeric to/from string. TODO: separate into optional header +#include <stdio.h> +#include <ctype.h> +#include <stdlib.h> // access to evil (fast) C isspace etc. +#include <limits.h> //strtoul +} + +inline void throw_string_to(std::string const& msg,char const* prefix="string_to: ") { + throw std::runtime_error(prefix+msg); +} + +template <class I,class To> +bool try_stream_into(I & i,To &to,bool complete=true) +{ + i >> to; + if (i.fail()) return false; + if (complete) { + char c; + return !(i >> c); + } + return true; +} + +template <class Str,class To> +bool try_string_into(Str const& str,To &to,bool complete=true) +{ + std::istringstream i(str); + return try_stream_into(i,to,complete); +} + +template <class Str,class Data> inline +Data & string_into(const Str &str,Data &data) +{ + if (!try_string_into(str,data)) + throw std::runtime_error(std::string("Couldn't convert (string_into): ")+str); + return data; +} + + +template <class Data,class Str> inline +Data string_to(const Str &str) +{ + Data ret; + string_into(str,ret); + return ret; +} + +template <class D> inline +std::string to_string(D const &d) +{ + std::ostringstream o; + o << d; + return o.str(); +} + +inline std::string to_string(unsigned x) { + return utos(x); +} + +inline std::string to_string(int x) { + return itos(x); +} + +inline long strtol_complete(char const* s,int base=10) { + char *e; + if (*s) { + long r=strtol(s,&e,base); + char c=*e; + if (!c || isspace(c)) //simplifying assumption: we're happy if there's other stuff in the string, so long as the number ends in a space or eos. TODO: loop consuming spaces until end? + return r; + } + throw_string_to(s,"Couldn't convert to integer: "); +} + +// returns -INT_MAX or INT_MAX if number is too large/small +inline int strtoi_complete_bounded(char const* s,int base=10) { + long l=strtol_complete(s,base); + if (l<std::numeric_limits<int>::min()) + return std::numeric_limits<int>::min(); + if (l>std::numeric_limits<int>::max()) + return std::numeric_limits<int>::max(); + return l; +} +#define RANGE_STR(x) #x +#ifdef INT_MIN +# define INTRANGE_STR "[" RANGE_STR(INT_MIN) "," RANGE_STR(INT_MAX) "]" +#else +# define INTRANGE_STR "[-2137483648,2147483647]" +#endif + + // throw if out of int range +inline int strtoi_complete_exact(char const* s,int base=10) { + long l=strtol_complete(s,base); + if (l<std::numeric_limits<int>::min() || l>std::numeric_limits<int>::max()) + throw_string_to(s,"Out of range for int " INTRANGE_STR ": "); + return l; +} + +#if HAVE_LONGER_LONG +inline int& string_into(std::string const& s,int &x) { + x=strtoi_complete_exact(s.c_str()); + return x; +} +inline int& string_into(char const* s,int &x) { + x=strtoi_complete_exact(s); + return x; +} +#endif + +inline long& string_into(std::string const& s,long &x) { + x=strtol_complete(s.c_str()); + return x; +} +inline long& string_into(char const* s,long &x) { + x=strtol_complete(s); + return x; +} + + +//FIXME: preprocessor separation for tokens int<->unsigned int, long<->unsigned long, strtol<->strtoul ? massive code duplication +inline unsigned long strtoul_complete(char const* s,int base=10) { + char *e; + if (*s) { +#if HAVE_STRTOUL + unsigned long r=strtoul(s,&e,base); +#else +// unsigned long r=strtol(s,&e,base); //FIXME: not usually safe + unsigned long r; + sscanf(s,"%ul",&r); +#endif + char c=*e; + if (!c || isspace(c)) //simplifying assumption: we're happy if there's other stuff in the string, so long as the number ends in a space or eos. TODO: loop consuming spaces until end? + return r; + } + throw_string_to(s,"Couldn't convert to integer: "); +} + +inline unsigned strtou_complete_bounded(char const* s,int base=10) { + unsigned long l=strtoul_complete(s,base); + if (l<std::numeric_limits<unsigned>::min()) + return std::numeric_limits<unsigned>::min(); + if (l>std::numeric_limits<unsigned>::max()) + return std::numeric_limits<unsigned>::max(); + return l; +} + +#ifdef UINT_MIN +# define UINTRANGE_STR "[" RANGE_STR(UINT_MIN) "," RANGE_STR(UINT_MAX) "]" +#else +# define UINTRANGE_STR "[0,4,294,967,295]" +#endif + + // throw if out of int range +inline unsigned strtou_complete_exact(char const* s,int base=10) { + unsigned long l=strtoul_complete(s,base); + if (l<std::numeric_limits<unsigned>::min() || l>std::numeric_limits<unsigned>::max()) + throw_string_to(s,"Out of range for uint " UINTRANGE_STR ": "); + return l; +} + +#if HAVE_LONGER_LONG +inline unsigned& string_into(std::string const& s,unsigned &x) { + x=strtou_complete_exact(s.c_str()); + return x; +} +inline unsigned& string_into(char const* s,unsigned &x) { + x=strtou_complete_exact(s); + return x; +} +#endif + +inline unsigned long& string_into(std::string const& s,unsigned long &x) { + x=strtoul_complete(s.c_str()); + return x; +} +inline unsigned long& string_into(char const* s,unsigned long &x) { + x=strtoul_complete(s); + return x; +} + +//FIXME: end code duplication + + +/* 9 decimal places needed to avoid rounding error in float->string->float. 17 for double->string->double + in terms of usable decimal places, there are 6 for float and 15 for double + */ +inline std::string to_string_roundtrip(float x) { + char buf[17]; + return std::string(buf,buf+sprintf(buf,"%.9g",x)); +} +inline std::string to_string(float x) { +#if USE_FTOA + return ftos(x); +#else + char buf[15]; + return std::string(buf,buf+sprintf(buf,"%.7g",x)); +#endif +} +inline std::string to_string_roundtrip(double x) { + char buf[32]; + return std::string(buf,buf+sprintf(buf,"%.17g",x)); +} +inline std::string to_string(double x) { +#if USE_FTOA + return ftos(x); +#else + char buf[30]; + return std::string(buf,buf+sprintf(buf,"%.15g",x)); +#endif +} + +inline double& string_into(char const* s,double &x) { + x=std::atof(s); + return x; +} +inline float& string_into(char const* s,float &x) { + x=std::atof(s); + return x; +} + +inline double& string_into(std::string const& s,double &x) { + x=std::atof(s.c_str()); + return x; +} +inline float& string_into(std::string const& s,float &x) { + x=std::atof(s.c_str()); + return x; +} + + +template <class Str> +bool try_string_into(Str const& str,Str &to,bool complete=true) +{ + str=to; + return true; +} + +inline std::string const& to_string(std::string const& d) +{ + return d; +} + +template <class Str> +Str const& string_to(Str const &s) +{ + return s; +} + +template <class Str> +Str & string_into(Str const &s,Str &d) +{ + return d=s; +} + +/* + +template <class Str,class Data,class size_type> inline +void substring_into(const Str &str,size_type pos,size_type n,Data &data) +{ +// std::istringstream i(str,pos,n); // doesn't exist! + std::istringstream i(str.substr(pos,n)); + if (!(i>>*data)) + throw std::runtime_error("Couldn't convert (string_into): "+str); +} + +template <class Data,class Str,class size_type> inline +Data string_to(const Str &str,size_type pos,size_type n) +{ + Data ret; + substring_into(str,pos,n,ret); + return ret; +} + +*/ + + + +#endif diff --git a/utils/warning_compiler.h b/utils/warning_compiler.h new file mode 100644 index 00000000..2052cff3 --- /dev/null +++ b/utils/warning_compiler.h @@ -0,0 +1,15 @@ +#ifndef WARNING_COMPILER_HPP +#define WARNING_COMPILER_HPP + +#ifndef HAVE_GCC_4_4 +#undef HAVE_DIAGNOSTIC_PUSH +#if __GNUC__ > 4 || __GNUC__==4 && __GNUC_MINOR__ > 3 +# define HAVE_GCC_4_4 1 +# define HAVE_DIAGNOSTIC_PUSH 1 +#else +# define HAVE_GCC_4_4 0 +# define HAVE_DIAGNOSTIC_PUSH 0 +#endif +#endif + +#endif diff --git a/utils/warning_pop.h b/utils/warning_pop.h new file mode 100644 index 00000000..2be5d9ed --- /dev/null +++ b/utils/warning_pop.h @@ -0,0 +1,8 @@ +#ifdef _MSC_VER +#pragma warning( pop ) +#else +#include "warning_compiler.h" +#if HAVE_DIAGNOSTIC_PUSH +# pragma GCC diagnostic pop +#endif +#endif diff --git a/utils/warning_push.h b/utils/warning_push.h new file mode 100644 index 00000000..086fd524 --- /dev/null +++ b/utils/warning_push.h @@ -0,0 +1,8 @@ +#ifdef _MSC_VER +#pragma warning( pop ) +#else +#include "warning_compiler.h" +#if HAVE_DIAGNOSTIC_PUSH +# pragma GCC diagnostic push +#endif +#endif |