summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
Diffstat (limited to 'utils')
-rwxr-xr-xutils/indices_after.h10
-rw-r--r--utils/logval.h36
-rw-r--r--utils/sparse_vector.h136
-rwxr-xr-xutils/string_to.h314
-rw-r--r--utils/warning_compiler.h15
-rw-r--r--utils/warning_pop.h8
-rw-r--r--utils/warning_push.h8
7 files changed, 493 insertions, 34 deletions
diff --git a/utils/indices_after.h b/utils/indices_after.h
index 2891563c..62683f39 100755
--- a/utils/indices_after.h
+++ b/utils/indices_after.h
@@ -47,8 +47,8 @@ unsigned new_indices_keep_if_n(unsigned n,It i,KeepIf const& r,O out)
}
template <class KEEP,class O>
-unsigned new_indices(KEEP keep,O out) {
- return new_indices(keep.begin(),keep.end(),out);
+unsigned new_indices_keep(KEEP keep,O out) {
+ return new_indices_keep(keep.begin(),keep.end(),out);
}
template <class V,class Out,class Permi>
@@ -129,8 +129,10 @@ struct indices_after
if (n_mapped>0) {
map=(unsigned *)::operator new(sizeof(unsigned)*n_mapped);
n_kept=new_indices_keep(i,end,map);
- } else
+ } else {
+ n_kept=0;
map=NULL;
+ }
}
template <class A>
void init(A const& a)
@@ -139,7 +141,7 @@ struct indices_after
}
template <class A>
- indices_after(A const& a)
+ explicit indices_after(A const& a)
{
init(a.begin(),a.end());
}
diff --git a/utils/logval.h b/utils/logval.h
index 37f14ae5..b337cf0e 100644
--- a/utils/logval.h
+++ b/utils/logval.h
@@ -3,12 +3,13 @@
#define LOGVAL_CHECK_NEG false
+#include <boost/functional/hash.hpp>
#include <iostream>
#include <cstdlib>
#include <cmath>
#include <limits>
-template <typename T>
+template <class T>
class LogVal {
public:
LogVal() : s_(), v_(-std::numeric_limits<T>::infinity()) {}
@@ -23,6 +24,11 @@ class LogVal {
static LogVal<T> e() { return LogVal(1,false); }
void logeq(const T& v) { s_ = false; v_ = v; }
+ std::size_t hash_impl() const {
+ using namespace boost;
+ return hash_value(v_)+s_;
+ }
+
LogVal& operator+=(const LogVal& a) {
if (a.v_ == -std::numeric_limits<T>::infinity()) return *this;
if (a.s_ == s_) {
@@ -98,31 +104,31 @@ class LogVal {
};
// copy elision - as opposed to explicit copy of LogVal<T> const& o1, we should be able to construct Logval r=a+(b+c) as a single result in place in r. todo: return std::move(o1) - C++0x
-template<typename T>
+template<class T>
LogVal<T> operator+(LogVal<T> o1, const LogVal<T>& o2) {
o1 += o2;
return o1;
}
-template<typename T>
+template<class T>
LogVal<T> operator*(LogVal<T> o1, const LogVal<T>& o2) {
o1 *= o2;
return o1;
}
-template<typename T>
+template<class T>
LogVal<T> operator/(LogVal<T> o1, const LogVal<T>& o2) {
o1 /= o2;
return o1;
}
-template<typename T>
+template<class T>
LogVal<T> operator-(LogVal<T> o1, const LogVal<T>& o2) {
o1 -= o2;
return o1;
}
-template<typename T>
+template<class T>
T log(const LogVal<T>& o) {
#ifdef LOGVAL_CHECK_NEG
if (o.s_) return log(-1.0);
@@ -130,12 +136,12 @@ T log(const LogVal<T>& o) {
return o.v_;
}
-template <typename T>
+template <class T>
LogVal<T> pow(const LogVal<T>& b, const T& e) {
return b.pow(e);
}
-template <typename T>
+template <class T>
bool operator<(const LogVal<T>& lhs, const LogVal<T>& rhs) {
if (lhs.s_ == rhs.s_) {
return (lhs.v_ < rhs.v_);
@@ -145,28 +151,32 @@ bool operator<(const LogVal<T>& lhs, const LogVal<T>& rhs) {
}
#if 0
-template <typename T>
+template <class T>
bool operator<=(const LogVal<T>& lhs, const LogVal<T>& rhs) {
return (lhs.v_ <= rhs.v_);
}
-template <typename T>
+template <class T>
bool operator>(const LogVal<T>& lhs, const LogVal<T>& rhs) {
return (lhs.v_ > rhs.v_);
}
-template <typename T>
+template <class T>
bool operator>=(const LogVal<T>& lhs, const LogVal<T>& rhs) {
return (lhs.v_ >= rhs.v_);
}
#endif
-template <typename T>
+
+template <class T>
+std::size_t hash_value(const LogVal<T>& x) { return x.hash_impl(); }
+
+template <class T>
bool operator==(const LogVal<T>& lhs, const LogVal<T>& rhs) {
return (lhs.v_ == rhs.v_) && (lhs.s_ == rhs.s_);
}
-template <typename T>
+template <class T>
bool operator!=(const LogVal<T>& lhs, const LogVal<T>& rhs) {
return !(lhs == rhs);
}
diff --git a/utils/sparse_vector.h b/utils/sparse_vector.h
index e8e9c2f7..7ac85d1d 100644
--- a/utils/sparse_vector.h
+++ b/utils/sparse_vector.h
@@ -1,16 +1,13 @@
#ifndef _SPARSE_VECTOR_H_
#define _SPARSE_VECTOR_H_
+/*
+TODO: specialize for int value types, where it probably makes sense to check if adding/subtracting brings a value to 0, and remove it from the map (e.g. in a gibbs sampler). or add a separate policy argument for that.
+ */
+
//#define SPARSE_VECTOR_HASH
+// if defined, use hash_map rather than map. map is probably faster/smaller for small vectors
-#ifdef SPARSE_VECTOR_HASH
-#include "hash.h"
-# define SPARSE_VECTOR_MAP HASH_MAP
-# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted) HASH_MAP_RESERVED(h,empty,deleted)
-#else
-# define SPARSE_VECTOR_MAP std::map
-# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted)
-#endif
/*
use SparseVectorList (pair smallvector) for feat funcs / hypergraphs (you rarely need random access; just append a feature to the list)
*/
@@ -38,6 +35,17 @@
// this is a modified version of code originally written
// by Phil Blunsom
+#include <boost/functional/hash.hpp>
+#include <stdexcept>
+#ifdef SPARSE_VECTOR_HASH
+#include "hash.h"
+# define SPARSE_VECTOR_MAP HASH_MAP
+# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted) HASH_MAP_RESERVED(h,empty,deleted)
+#else
+# define SPARSE_VECTOR_MAP std::map
+# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted)
+#endif
+
#include <iostream>
#include <map>
#include <tr1/unordered_map>
@@ -46,6 +54,7 @@
#include "fdict.h"
#include "small_vector.h"
+#include "string_to.h"
template <class T>
inline T & extend_vector(std::vector<T> &v,int i) {
@@ -54,7 +63,7 @@ inline T & extend_vector(std::vector<T> &v,int i) {
return v[i];
}
-template <typename T>
+template <class T>
class SparseVector {
void init_reserved() {
SPARSE_VECTOR_MAP_RESERVED(values_,-1,-2);
@@ -71,17 +80,97 @@ public:
SparseVector() {
init_reserved();
}
+ typedef typename MapType::value_type value_type;
+ typedef typename MapType::iterator iterator;
explicit SparseVector(std::vector<T> const& v) {
init_reserved();
- typename MapType::iterator p=values_.begin();
+ iterator p=values_.begin();
const T z=0;
for (unsigned i=0;i<v.size();++i) {
T const& t=v[i];
if (t!=z)
- p=values_.insert(p,typename MapType::value_type(i,t)); //hint makes insertion faster
+ p=values_.insert(p,value_type(i,t)); //hint makes insertion faster
+ }
+ }
+
+ typedef char const* Str;
+ template <class O>
+ void print(O &o,Str pre="",Str post="",Str kvsep="=",Str pairsep=" ") const {
+ o << pre;
+ bool first=true;
+ for (const_iterator i=values_.begin(),e=values_.end();i!=e;++i) {
+ if (first)
+ first=false;
+ else
+ o<<pairsep;
+ o<<FD::Convert(i->first)<<kvsep<<i->second;
}
+ o << post;
}
+ static void error(std::string const& msg) {
+ throw std::runtime_error("SparseVector: "+msg);
+ }
+
+ enum DupPolicy {
+ NO_DUPS,
+ KEEP_FIRST,
+ KEEP_LAST,
+ SUM
+ };
+
+ // either key val alternating whitespace sep, or key=val (kvsep char is '='). end at eof or terminator (non-ws) char
+ template <class S>
+ void read(S &s,DupPolicy dp=NO_DUPS,bool use_kvsep=true,char kvsep='=',bool stop_at_terminator=false,char terminator=')') {
+ values_.clear();
+ std::string id;
+ WordID k;
+ T v;
+#undef SPARSE_MUST_READ
+#define SPARSE_MUST_READ(x) if (!(x)) error(#x);
+ int ki;
+ while (s) {
+ if (stop_at_terminator) {
+ char c;
+ if (!(s>>c)) goto eof;
+ s.unget();
+ if (c==terminator) return;
+ }
+ if (!(s>>id)) goto eof;
+ if (use_kvsep && (ki=id.find(kvsep))!=std::string::npos) {
+ k=FD::Convert(std::string(id,0,ki));
+ string_into(id.c_str()+ki+1,v);
+ } else {
+ k=FD::Convert(id);
+ if (!(s>>v)) error("reading value failed");
+ }
+ std::pair<iterator,bool> vi=values_.insert(value_type(k,v));
+ if (vi.second) {
+ T &oldv=vi.first->second;
+ switch(dp) {
+ case NO_DUPS: error("read duplicate key with NO_DUPS. key="
+ +FD::Convert(k)+" val="+to_string(v)+" old-val="+to_string(oldv));
+ break;
+ case KEEP_FIRST: break;
+ case KEEP_LAST: oldv=v; break;
+ case SUM: oldv+=v; break;
+ }
+ }
+ }
+ return;
+ eof:
+ if (!s.eof()) error("reading key failed (before EOF)");
+ }
+
+ friend inline std::ostream & operator<<(std::ostream &o,Self const& s) {
+ s.print(o);
+ return o;
+ }
+
+ friend inline std::istream & operator>>(std::istream &o,Self & s) {
+ s.read(o);
+ return o;
+ }
void init_vector(std::vector<T> *vp) const {
init_vector(*vp);
@@ -118,6 +207,10 @@ public:
return values_[index];
}
+ inline void maybe_set_value(int index, const T &value) {
+ if (value) values_[index] = value;
+ }
+
inline void set_value(int index, const T &value) {
values_[index] = value;
}
@@ -352,6 +445,10 @@ public:
return size()==other.size() && contains_keys_of(other) && other.contains_i(*this);
}
+ std::size_t hash_impl() const {
+ return boost::hash_range(begin(),end());
+ }
+
bool contains(Self const &o) const {
return size()>o.size() && contains(o);
}
@@ -371,7 +468,7 @@ public:
bool contains_keys_of(Self const& o) const {
for (typename MapType::const_iterator i=o.begin(),e=o.end();i!=e;++i)
- if (values_.find(i)==values_.end())
+ if (values_.find(i->first)==values_.end())
return false;
return true;
}
@@ -478,31 +575,36 @@ private:
List p;
};
-template <typename T>
+template <class T>
+std::size_t hash_value(SparseVector<T> const& x) {
+ return x.hash_impl();
+}
+
+template <class T>
SparseVector<T> operator+(const SparseVector<T>& a, const SparseVector<T>& b) {
SparseVector<T> result = a;
return result += b;
}
-template <typename T>
+template <class T>
SparseVector<T> operator*(const SparseVector<T>& a, const double& b) {
SparseVector<T> result = a;
return result *= b;
}
-template <typename T>
+template <class T>
SparseVector<T> operator*(const SparseVector<T>& a, const T& b) {
SparseVector<T> result = a;
return result *= b;
}
-template <typename T>
+template <class T>
SparseVector<T> operator*(const double& a, const SparseVector<T>& b) {
SparseVector<T> result = b;
return result *= a;
}
-template <typename T>
+template <class T>
std::ostream &operator<<(std::ostream &out, const SparseVector<T> &vec)
{
return vec.operator<<(out);
diff --git a/utils/string_to.h b/utils/string_to.h
new file mode 100755
index 00000000..c78a5394
--- /dev/null
+++ b/utils/string_to.h
@@ -0,0 +1,314 @@
+#ifndef STRING_TO_H
+#define STRING_TO_H
+
+/*
+ may not be any faster than boost::lexical_cast in later incarnations (see http://accu.org/index.php/journals/1375)
+ but is slightly simpler. no wide char or locale.
+
+ X string_to<X>(string);
+ string to_string(X);
+ X& string_into(string,X &); // note: returns the same ref you passed in, for convenience of use
+
+ default implementation via stringstreams (quite slow, I'm sure)
+
+ fast implementation for string, int<->string, unsigned<->string, float<->string, double<->string
+
+*/
+
+#ifndef USE_FTOA
+#define USE_FTOA 1
+#endif
+#ifndef HAVE_STRTOUL
+# define HAVE_STRTOUL 1
+#endif
+
+#include <string>
+#include <sstream>
+#include <stdexcept>
+#include <cstdlib>
+
+#include "have_64_bits.h"
+#include "utoa.h"
+#if USE_FTOA
+# include "ftoa.h"
+#endif
+
+namespace {
+// for faster numeric to/from string. TODO: separate into optional header
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h> // access to evil (fast) C isspace etc.
+#include <limits.h> //strtoul
+}
+
+inline void throw_string_to(std::string const& msg,char const* prefix="string_to: ") {
+ throw std::runtime_error(prefix+msg);
+}
+
+template <class I,class To>
+bool try_stream_into(I & i,To &to,bool complete=true)
+{
+ i >> to;
+ if (i.fail()) return false;
+ if (complete) {
+ char c;
+ return !(i >> c);
+ }
+ return true;
+}
+
+template <class Str,class To>
+bool try_string_into(Str const& str,To &to,bool complete=true)
+{
+ std::istringstream i(str);
+ return try_stream_into(i,to,complete);
+}
+
+template <class Str,class Data> inline
+Data & string_into(const Str &str,Data &data)
+{
+ if (!try_string_into(str,data))
+ throw std::runtime_error(std::string("Couldn't convert (string_into): ")+str);
+ return data;
+}
+
+
+template <class Data,class Str> inline
+Data string_to(const Str &str)
+{
+ Data ret;
+ string_into(str,ret);
+ return ret;
+}
+
+template <class D> inline
+std::string to_string(D const &d)
+{
+ std::ostringstream o;
+ o << d;
+ return o.str();
+}
+
+inline std::string to_string(unsigned x) {
+ return utos(x);
+}
+
+inline std::string to_string(int x) {
+ return itos(x);
+}
+
+inline long strtol_complete(char const* s,int base=10) {
+ char *e;
+ if (*s) {
+ long r=strtol(s,&e,base);
+ char c=*e;
+ if (!c || isspace(c)) //simplifying assumption: we're happy if there's other stuff in the string, so long as the number ends in a space or eos. TODO: loop consuming spaces until end?
+ return r;
+ }
+ throw_string_to(s,"Couldn't convert to integer: ");
+}
+
+// returns -INT_MAX or INT_MAX if number is too large/small
+inline int strtoi_complete_bounded(char const* s,int base=10) {
+ long l=strtol_complete(s,base);
+ if (l<std::numeric_limits<int>::min())
+ return std::numeric_limits<int>::min();
+ if (l>std::numeric_limits<int>::max())
+ return std::numeric_limits<int>::max();
+ return l;
+}
+#define RANGE_STR(x) #x
+#ifdef INT_MIN
+# define INTRANGE_STR "[" RANGE_STR(INT_MIN) "," RANGE_STR(INT_MAX) "]"
+#else
+# define INTRANGE_STR "[-2137483648,2147483647]"
+#endif
+
+ // throw if out of int range
+inline int strtoi_complete_exact(char const* s,int base=10) {
+ long l=strtol_complete(s,base);
+ if (l<std::numeric_limits<int>::min() || l>std::numeric_limits<int>::max())
+ throw_string_to(s,"Out of range for int " INTRANGE_STR ": ");
+ return l;
+}
+
+#if HAVE_LONGER_LONG
+inline int& string_into(std::string const& s,int &x) {
+ x=strtoi_complete_exact(s.c_str());
+ return x;
+}
+inline int& string_into(char const* s,int &x) {
+ x=strtoi_complete_exact(s);
+ return x;
+}
+#endif
+
+inline long& string_into(std::string const& s,long &x) {
+ x=strtol_complete(s.c_str());
+ return x;
+}
+inline long& string_into(char const* s,long &x) {
+ x=strtol_complete(s);
+ return x;
+}
+
+
+//FIXME: preprocessor separation for tokens int<->unsigned int, long<->unsigned long, strtol<->strtoul ? massive code duplication
+inline unsigned long strtoul_complete(char const* s,int base=10) {
+ char *e;
+ if (*s) {
+#if HAVE_STRTOUL
+ unsigned long r=strtoul(s,&e,base);
+#else
+// unsigned long r=strtol(s,&e,base); //FIXME: not usually safe
+ unsigned long r;
+ sscanf(s,"%ul",&r);
+#endif
+ char c=*e;
+ if (!c || isspace(c)) //simplifying assumption: we're happy if there's other stuff in the string, so long as the number ends in a space or eos. TODO: loop consuming spaces until end?
+ return r;
+ }
+ throw_string_to(s,"Couldn't convert to integer: ");
+}
+
+inline unsigned strtou_complete_bounded(char const* s,int base=10) {
+ unsigned long l=strtoul_complete(s,base);
+ if (l<std::numeric_limits<unsigned>::min())
+ return std::numeric_limits<unsigned>::min();
+ if (l>std::numeric_limits<unsigned>::max())
+ return std::numeric_limits<unsigned>::max();
+ return l;
+}
+
+#ifdef UINT_MIN
+# define UINTRANGE_STR "[" RANGE_STR(UINT_MIN) "," RANGE_STR(UINT_MAX) "]"
+#else
+# define UINTRANGE_STR "[0,4,294,967,295]"
+#endif
+
+ // throw if out of int range
+inline unsigned strtou_complete_exact(char const* s,int base=10) {
+ unsigned long l=strtoul_complete(s,base);
+ if (l<std::numeric_limits<unsigned>::min() || l>std::numeric_limits<unsigned>::max())
+ throw_string_to(s,"Out of range for uint " UINTRANGE_STR ": ");
+ return l;
+}
+
+#if HAVE_LONGER_LONG
+inline unsigned& string_into(std::string const& s,unsigned &x) {
+ x=strtou_complete_exact(s.c_str());
+ return x;
+}
+inline unsigned& string_into(char const* s,unsigned &x) {
+ x=strtou_complete_exact(s);
+ return x;
+}
+#endif
+
+inline unsigned long& string_into(std::string const& s,unsigned long &x) {
+ x=strtoul_complete(s.c_str());
+ return x;
+}
+inline unsigned long& string_into(char const* s,unsigned long &x) {
+ x=strtoul_complete(s);
+ return x;
+}
+
+//FIXME: end code duplication
+
+
+/* 9 decimal places needed to avoid rounding error in float->string->float. 17 for double->string->double
+ in terms of usable decimal places, there are 6 for float and 15 for double
+ */
+inline std::string to_string_roundtrip(float x) {
+ char buf[17];
+ return std::string(buf,buf+sprintf(buf,"%.9g",x));
+}
+inline std::string to_string(float x) {
+#if USE_FTOA
+ return ftos(x);
+#else
+ char buf[15];
+ return std::string(buf,buf+sprintf(buf,"%.7g",x));
+#endif
+}
+inline std::string to_string_roundtrip(double x) {
+ char buf[32];
+ return std::string(buf,buf+sprintf(buf,"%.17g",x));
+}
+inline std::string to_string(double x) {
+#if USE_FTOA
+ return ftos(x);
+#else
+ char buf[30];
+ return std::string(buf,buf+sprintf(buf,"%.15g",x));
+#endif
+}
+
+inline double& string_into(char const* s,double &x) {
+ x=std::atof(s);
+ return x;
+}
+inline float& string_into(char const* s,float &x) {
+ x=std::atof(s);
+ return x;
+}
+
+inline double& string_into(std::string const& s,double &x) {
+ x=std::atof(s.c_str());
+ return x;
+}
+inline float& string_into(std::string const& s,float &x) {
+ x=std::atof(s.c_str());
+ return x;
+}
+
+
+template <class Str>
+bool try_string_into(Str const& str,Str &to,bool complete=true)
+{
+ str=to;
+ return true;
+}
+
+inline std::string const& to_string(std::string const& d)
+{
+ return d;
+}
+
+template <class Str>
+Str const& string_to(Str const &s)
+{
+ return s;
+}
+
+template <class Str>
+Str & string_into(Str const &s,Str &d)
+{
+ return d=s;
+}
+
+/*
+
+template <class Str,class Data,class size_type> inline
+void substring_into(const Str &str,size_type pos,size_type n,Data &data)
+{
+// std::istringstream i(str,pos,n); // doesn't exist!
+ std::istringstream i(str.substr(pos,n));
+ if (!(i>>*data))
+ throw std::runtime_error("Couldn't convert (string_into): "+str);
+}
+
+template <class Data,class Str,class size_type> inline
+Data string_to(const Str &str,size_type pos,size_type n)
+{
+ Data ret;
+ substring_into(str,pos,n,ret);
+ return ret;
+}
+
+*/
+
+
+
+#endif
diff --git a/utils/warning_compiler.h b/utils/warning_compiler.h
new file mode 100644
index 00000000..2052cff3
--- /dev/null
+++ b/utils/warning_compiler.h
@@ -0,0 +1,15 @@
+#ifndef WARNING_COMPILER_HPP
+#define WARNING_COMPILER_HPP
+
+#ifndef HAVE_GCC_4_4
+#undef HAVE_DIAGNOSTIC_PUSH
+#if __GNUC__ > 4 || __GNUC__==4 && __GNUC_MINOR__ > 3
+# define HAVE_GCC_4_4 1
+# define HAVE_DIAGNOSTIC_PUSH 1
+#else
+# define HAVE_GCC_4_4 0
+# define HAVE_DIAGNOSTIC_PUSH 0
+#endif
+#endif
+
+#endif
diff --git a/utils/warning_pop.h b/utils/warning_pop.h
new file mode 100644
index 00000000..2be5d9ed
--- /dev/null
+++ b/utils/warning_pop.h
@@ -0,0 +1,8 @@
+#ifdef _MSC_VER
+#pragma warning( pop )
+#else
+#include "warning_compiler.h"
+#if HAVE_DIAGNOSTIC_PUSH
+# pragma GCC diagnostic pop
+#endif
+#endif
diff --git a/utils/warning_push.h b/utils/warning_push.h
new file mode 100644
index 00000000..086fd524
--- /dev/null
+++ b/utils/warning_push.h
@@ -0,0 +1,8 @@
+#ifdef _MSC_VER
+#pragma warning( pop )
+#else
+#include "warning_compiler.h"
+#if HAVE_DIAGNOSTIC_PUSH
+# pragma GCC diagnostic push
+#endif
+#endif