From a2d9d0f96502c7d3c04303f3db36a8602d992287 Mon Sep 17 00:00:00 2001 From: graehl Date: Fri, 23 Jul 2010 19:17:22 +0000 Subject: sparse_vector use google::dense_hash_map, fsa scan logging git-svn-id: https://ws10smt.googlecode.com/svn/trunk@383 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/sparse_vector.h | 100 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 77 insertions(+), 23 deletions(-) (limited to 'decoder/sparse_vector.h') diff --git a/decoder/sparse_vector.h b/decoder/sparse_vector.h index b42e001a..0f3724f0 100644 --- a/decoder/sparse_vector.h +++ b/decoder/sparse_vector.h @@ -1,6 +1,17 @@ #ifndef _SPARSE_VECTOR_H_ #define _SPARSE_VECTOR_H_ -/* TODO: use dense_hash_map for sparsevector + +#define SPARSE_VECTOR_HASH + +#ifdef SPARSE_VECTOR_HASH +#include "hash.h" +# define SPARSE_VECTOR_MAP HASH_MAP +# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted) HASH_MAP_RESERVED(h,empty,deleted) +#else +# define SPARSE_VECTOR_MAP std::map +# define SPARSE_VECTOR_MAP_RESERVED(h,empty,deleted) +#endif +/* use SparseVectorList (pair smallvector) for feat funcs / hypergraphs (you rarely need random access; just append a feature to the list) */ /* hack: index 0 never gets printed because cdyer is creative and efficient. features which have no weight got feature dict id 0, see, and the models all clobered that value. nobody wants to see it. except that vlad is also creative and efficient and stored the oracle bleu there. */ @@ -27,21 +38,28 @@ inline T & extend_vector(std::vector &v,int i) { template class SparseVector { + void init_reserved() { + SPARSE_VECTOR_MAP_RESERVED(values_,-1,-2); + } public: - typedef std::map MapType; - typedef typename std::map::const_iterator const_iterator; - SparseVector() {} + typedef SparseVector Self; + typedef SPARSE_VECTOR_MAP MapType; + typedef typename MapType::const_iterator const_iterator; + SparseVector() { + init_reserved(); + } explicit SparseVector(std::vector const& v) { + init_reserved(); typename MapType::iterator p=values_.begin(); - const T z=T(0); + const T z=0; for (unsigned i=0;i *vp) const { init_vector(*vp); } @@ -52,7 +70,6 @@ public: extend_vector(v,i->first)=i->second; } - void set_new_value(int index, T const& val) { assert(values_.find(index)==values_.end()); values_[index]=val; @@ -65,10 +82,10 @@ public: } - const T operator[](int index) const { + T operator[](int index) const { typename MapType::const_iterator found = values_.find(index); if (found == values_.end()) - return T(0); + return 0; else return found->second; } @@ -77,8 +94,11 @@ public: values_[index] = value; } - T add_value(int index, const T &value) { - return values_[index] += value; + T const& add_value(int index, const T &value) { + std::pair art=values_.insert(std::make_pair(index,value)); + T &val=art.first->second; + if (!art.second) val += value; // already existed + return val; } T value(int index) const { @@ -86,7 +106,7 @@ public: if (found != values_.end()) return found->second; else - return T(0); + return 0; } void store(std::valarray* target) const { @@ -184,13 +204,20 @@ public: return sqrt(l2norm_sq()); } + void erase(int key) { + values_.erase(key); +/* typename MapType::iterator found = values_.find(key); + if (found!=values_end()) + values_.erase(found);*/ + } + SparseVector &operator+=(const SparseVector &other) { for (typename MapType::const_iterator it = other.values_.begin(); it != other.values_.end(); ++it) { - T v = (values_[it->first] += it->second); - if (v == T()) - values_.erase(it->first); +// T v = + (values_[it->first] += it->second); +// if (v == 0) values_.erase(it->first); } return *this; } @@ -199,9 +226,9 @@ public: for (typename MapType::const_iterator it = other.values_.begin(); it != other.values_.end(); ++it) { - T v = (values_[it->first] -= it->second); - if (v == T(0)) - values_.erase(it->first); +// T v = + (values_[it->first] -= it->second); +// if (v == 0) values_.erase(it->first); } return *this; } @@ -282,6 +309,35 @@ public: } } + bool operator==(Self const & other) const { + return size()==other.size() && contains_keys_of(other) && other.contains_i(*this); + } + + bool contains(Self const &o) const { + return size()>o.size() && contains(o); + } + + bool at_equals(int i,T const& val) const { + const_iterator it=values_.find(i); + if (it==values_.end()) return val==0; + return it->second==val; + } + + bool contains_i(Self const& o) const { + for (typename MapType::const_iterator i=o.begin(),e=o.end();i!=e;++i) + if (!at_equals(i->first,i->second)) + return false; + return true; + } + + bool contains_keys_of(Self const& o) const { + for (typename MapType::const_iterator i=o.begin(),e=o.end();i!=e;++i) + if (values_.find(i)==values_.end()) + return false; + return true; + } + +#ifndef SPARSE_VECTOR_HASH bool operator<(const SparseVector &other) const { typename MapType::const_iterator it = values_.begin(); typename MapType::const_iterator other_it = other.values_.begin(); @@ -295,6 +351,7 @@ public: } return values_.size() < other.values_.size(); } +#endif int size() const { return values_.size(); } @@ -307,9 +364,6 @@ public: void clear() { values_.clear(); } - void clear_value(int index) { - values_.erase(index); - } void swap(SparseVector& other) { values_.swap(other.values_); @@ -328,7 +382,7 @@ class SparseVectorList { SparseVectorList() { } template SparseVectorList(I i,I const& end) { - const T z=T(0); + const T z=0; int c=0; for (;i const& v) { - const T z=T(0); + const T z=0; for (unsigned i=0;i