#ifndef _SPARSE_VECTOR_H_ #define _SPARSE_VECTOR_H_ /* TODO: use dense_hash_map for sparsevector use SparseVectorList (pair smallvector) for feat funcs / hypergraphs (you rarely need random access; just append a feature to the list) */ /* hack: index 0 never gets printed because cdyer is creative and efficient. features which have no weight got feature dict id 0, see, and the models all clobered that value. nobody wants to see it. except that vlad is also creative and efficient and stored the oracle bleu there. */ // this is a modified version of code originally written // by Phil Blunsom #include #include #include #include #include #include "fdict.h" #include "small_vector.h" template inline T & extend_vector(std::vector &v,int i) { if (i>=v.size()) v.resize(i+1); return v[i]; } template class SparseVector { public: typedef std::map MapType; typedef typename std::map::const_iterator const_iterator; SparseVector() {} explicit SparseVector(std::vector const& v) { typename MapType::iterator p=values_.begin(); const T z=T(0); for (unsigned i=0;i *vp) const { init_vector(*vp); } void init_vector(std::vector &v) const { v.clear(); for (const_iterator i=values_.begin(),e=values_.end();i!=e;++i) extend_vector(v,i->first)=i->second; } void set_new_value(int index, T const& val) { assert(values_.find(index)==values_.end()); values_[index]=val; } // warning: exploits the fact that 0 values are always removed from map. change this if you change that. bool nonzero(int index) const { return values_.find(index) != values_.end(); } const T operator[](int index) const { typename MapType::const_iterator found = values_.find(index); if (found == values_.end()) return T(0); else return found->second; } void set_value(int index, const T &value) { values_[index] = value; } T add_value(int index, const T &value) { return values_[index] += value; } T value(int index) const { typename MapType::const_iterator found = values_.find(index); if (found != values_.end()) return found->second; else return T(0); } void store(std::valarray* target) const { (*target) *= 0; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) { if (it->first >= target->size()) break; (*target)[it->first] = it->second; } } int max_index() const { if (empty()) return 0; typename MapType::const_iterator found =values_.end(); --found; return found->first; } // dot product with a unit vector of the same length // as the sparse vector T dot() const { T sum = 0; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) sum += it->second; return sum; } template S cosine_sim(const SparseVector &vec) const { return dot(vec)/(l2norm()*vec.l2norm()); } // if values are binary, gives |A intersect B|/|A union B| template S tanimoto_coef(const SparseVector &vec) const { S dp=dot(vec); return dp/(l2norm_sq()+vec.l2norm_sq()-dp); } template S dot(const SparseVector &vec) const { S sum = 0; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) { typename MapType::const_iterator found = vec.values_.find(it->first); if (found != vec.values_.end()) sum += it->second * found->second; } return sum; } template S dot(const std::vector &vec) const { S sum = 0; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) { if (it->first < static_cast(vec.size())) sum += it->second * vec[it->first]; } return sum; } template S dot(const S *vec) const { // this is not range checked! S sum = 0; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) sum += it->second * vec[it->first]; std::cout << "dot(*vec) " << sum << std::endl; return sum; } T l1norm() const { T sum = 0; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) sum += fabs(it->second); return sum; } T l2norm_sq() const { T sum = 0; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) sum += it->second * it->second; return sum; } T l2norm() const { return sqrt(l2norm_sq()); } SparseVector &operator+=(const SparseVector &other) { for (typename MapType::const_iterator it = other.values_.begin(); it != other.values_.end(); ++it) { T v = (values_[it->first] += it->second); if (v == T()) values_.erase(it->first); } return *this; } SparseVector &operator-=(const SparseVector &other) { for (typename MapType::const_iterator it = other.values_.begin(); it != other.values_.end(); ++it) { T v = (values_[it->first] -= it->second); if (v == T(0)) values_.erase(it->first); } return *this; } friend SparseVector operator -(SparseVector x,SparseVector const& y) { x-=y; return x; } friend SparseVector operator +(SparseVector x,SparseVector const& y) { x+=y; return x; } private: // DEPRECATED: becuase 0 values are dropped from the map, this doesn't even make sense if you have a fully populated (not really sparse re: what you'll ever use) vector SparseVector &operator-=(T const& x) { for (typename MapType::iterator it = values_.begin(); it != values_.end(); ++it) it->second -= x; return *this; } SparseVector &operator+=(T const& x) { for (typename MapType::iterator it = values_.begin(); it != values_.end(); ++it) it->second += x; return *this; } public: SparseVector &operator/=(const T &x) { for (typename MapType::iterator it = values_.begin(); it != values_.end(); ++it) it->second /= x; return *this; } SparseVector &operator*=(const T& x) { for (typename MapType::iterator it = values_.begin(); it != values_.end(); ++it) it->second *= x; return *this; } SparseVector operator+(T const& x) const { SparseVector result = *this; return result += x; } SparseVector operator-(T const& x) const { SparseVector result = *this; return result -= x; } SparseVector operator/(T const& x) const { SparseVector result = *this; return result /= x; } std::ostream &operator<<(std::ostream& out) const { Write(true, &out); return out; } void Write(const bool with_semi, std::ostream* os) const { bool first = true; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) { // by definition feature id 0 is a dummy value if (it->first == 0) continue; if (with_semi) { (*os) << (first ? "" : ";") << FD::Convert(it->first) << '=' << it->second; } else { (*os) << (first ? "" : " ") << FD::Convert(it->first) << '=' << it->second; } first = false; } } bool operator<(const SparseVector &other) const { typename MapType::const_iterator it = values_.begin(); typename MapType::const_iterator other_it = other.values_.begin(); for (; it != values_.end() && other_it != other.values_.end(); ++it, ++other_it) { if (it->first < other_it->first) return true; if (it->first > other_it->first) return false; if (it->second < other_it->second) return true; if (it->second > other_it->second) return false; } return values_.size() < other.values_.size(); } int size() const { return values_.size(); } int num_active() const { return values_.size(); } bool empty() const { return values_.empty(); } const_iterator begin() const { return values_.begin(); } const_iterator end() const { return values_.end(); } void clear() { values_.clear(); } void clear_value(int index) { values_.erase(index); } void swap(SparseVector& other) { values_.swap(other.values_); } private: MapType values_; }; // doesn't support fast indexing directly template class SparseVectorList { typedef typename std::pair Pair; typedef SmallVector List; typedef typename List::const_iterator const_iterator; SparseVectorList() { } template SparseVectorList(I i,I const& end) { const T z=T(0); int c=0; for (;i const& v) { const T z=T(0); for (unsigned i=0;i FeatureVector; typedef SparseVector WeightVector; typedef std::vector DenseWeightVector; template SparseVector operator+(const SparseVector& a, const SparseVector& b) { SparseVector result = a; return result += b; } template SparseVector operator*(const SparseVector& a, const double& b) { SparseVector result = a; return result *= b; } template SparseVector operator*(const SparseVector& a, const T& b) { SparseVector result = a; return result *= b; } template SparseVector operator*(const double& a, const SparseVector& b) { SparseVector result = b; return result *= a; } template std::ostream &operator<<(std::ostream &out, const SparseVector &vec) { return vec.operator<<(out); } namespace B64 { void Encode(double objective, const SparseVector& v, std::ostream* out); // returns false if failed to decode bool Decode(double* objective, SparseVector* v, const char* data, size_t size); } #endif