#ifndef _SPARSE_VECTOR_H_ #define _SPARSE_VECTOR_H_ // this is a modified version of code originally written // by Phil Blunsom #include #include #include #include #include #include "fdict.h" #include "small_vector.h" template inline T & extend_vector(std::vector &v,int i) { if (i>=v.size()) v.resize(i+1); return v[i]; } template class SparseVector { public: typedef std::map MapType; typedef typename std::map::const_iterator const_iterator; SparseVector() {} explicit SparseVector(std::vector const& v) { typename MapType::iterator p=values_.begin(); const T z=T(0); for (unsigned i=0;i *vp) const { init_vector(*vp); } void init_vector(std::vector &v) const { v.clear(); for (const_iterator i=values_.begin(),e=values_.end();i!=e;++i) extend_vector(v,i->first)=i->second; } void set_new_value(int index, T const& val) { assert(values_.find(index)==values_.end()); values_[index]=val; } const T operator[](int index) const { typename MapType::const_iterator found = values_.find(index); if (found == values_.end()) return T(0); else return found->second; } void set_value(int index, const T &value) { values_[index] = value; } T add_value(int index, const T &value) { return values_[index] += value; } T value(int index) const { typename MapType::const_iterator found = values_.find(index); if (found != values_.end()) return found->second; else return T(0); } void store(std::valarray* target) const { (*target) *= 0; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) { if (it->first >= target->size()) break; (*target)[it->first] = it->second; } } int max_index() const { if (empty()) return 0; typename MapType::const_iterator found =values_.end(); --found; return found->first; } // dot product with a unit vector of the same length // as the sparse vector T dot() const { T sum = 0; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) sum += it->second; return sum; } template S cosine_sim(const SparseVector &vec) const { return dot(vec)/(l2norm()*vec.l2norm()); } // if values are binary, gives |A intersect B|/|A union B| template S tanimoto_coef(const SparseVector &vec) const { S dp=dot(vec); return dp/(l2norm_sq()+vec.l2norm_sq()-dp); } template S dot(const SparseVector &vec) const { S sum = 0; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) { typename MapType::const_iterator found = vec.values_.find(it->first); if (found != vec.values_.end()) sum += it->second * found->second; } return sum; } template S dot(const std::vector &vec) const { S sum = 0; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) { if (it->first < static_cast(vec.size())) sum += it->second * vec[it->first]; } return sum; } template S dot(const S *vec) const { // this is not range checked! S sum = 0; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) sum += it->second * vec[it->first]; std::cout << "dot(*vec) " << sum << std::endl; return sum; } T l1norm() const { T sum = 0; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) sum += fabs(it->second); return sum; } T l2norm_sq() const { T sum = 0; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) sum += it->second * it->second; return sum; } T l2norm() const { return sqrt(l2norm_sq()); } SparseVector &operator+=(const SparseVector &other) { for (typename MapType::const_iterator it = other.values_.begin(); it != other.values_.end(); ++it) { T v = (values_[it->first] += it->second); if (v == T()) values_.erase(it->first); } return *this; } SparseVector &operator-=(const SparseVector &other) { for (typename MapType::const_iterator it = other.values_.begin(); it != other.values_.end(); ++it) { T v = (values_[it->first] -= it->second); if (v == T(0)) values_.erase(it->first); } return *this; } friend SparseVector operator -(SparseVector x,SparseVector const& y) { x-=y; return x; } friend SparseVector operator +(SparseVector x,SparseVector const& y) { x+=y; return x; } private: // DEPRECATED: becuase 0 values are dropped from the map, this doesn't even make sense if you have a fully populated (not really sparse re: what you'll ever use) vector SparseVector &operator-=(T const& x) { for (typename MapType::iterator it = values_.begin(); it != values_.end(); ++it) it->second -= x; return *this; } SparseVector &operator+=(T const& x) { for (typename MapType::iterator it = values_.begin(); it != values_.end(); ++it) it->second += x; return *this; } public: SparseVector &operator/=(const T &x) { for (typename MapType::iterator it = values_.begin(); it != values_.end(); ++it) it->second /= x; return *this; } SparseVector &operator*=(const T& x) { for (typename MapType::iterator it = values_.begin(); it != values_.end(); ++it) it->second *= x; return *this; } SparseVector operator+(T const& x) const { SparseVector result = *this; return result += x; } SparseVector operator-(T const& x) const { SparseVector result = *this; return result -= x; } SparseVector operator/(T const& x) const { SparseVector result = *this; return result /= x; } std::ostream &operator<<(std::ostream& out) const { Write(true, &out); return out; } void Write(const bool with_semi, std::ostream* os) const { bool first = true; for (typename MapType::const_iterator it = values_.begin(); it != values_.end(); ++it) { // by definition feature id 0 is a dummy value if (it->first == 0) continue; if (with_semi) { (*os) << (first ? "" : ";") << FD::Convert(it->first) << '=' << it->second; } else { (*os) << (first ? "" : " ") << FD::Convert(it->first) << '=' << it->second; } first = false; } } bool operator<(const SparseVector &other) const { typename MapType::const_iterator it = values_.begin(); typename MapType::const_iterator other_it = other.values_.begin(); for (; it != values_.end() && other_it != other.values_.end(); ++it, ++other_it) { if (it->first < other_it->first) return true; if (it->first > other_it->first) return false; if (it->second < other_it->second) return true; if (it->second > other_it->second) return false; } return values_.size() < other.values_.size(); } int size() const { return values_.size(); } int num_active() const { return values_.size(); } bool empty() const { return values_.empty(); } const_iterator begin() const { return values_.begin(); } const_iterator end() const { return values_.end(); } void clear() { values_.clear(); } void clear_value(int index) { values_.erase(index); } void swap(SparseVector& other) { values_.swap(other.values_); } private: MapType values_; }; // doesn't support fast indexing directly template class SparseVectorList { typedef typename std::pair Pair; typedef SmallVector List; typedef typename List::const_iterator const_iterator; SparseVectorList() { } template SparseVectorList(I i,I const& end) { const T z=T(0); int c=0; for (;i const& v) { const T z=T(0); for (unsigned i=0;i FeatureVector; typedef SparseVector WeightVector; typedef std::vector DenseWeightVector; template SparseVector operator+(const SparseVector& a, const SparseVector& b) { SparseVector result = a; return result += b; } template SparseVector operator*(const SparseVector& a, const double& b) { SparseVector result = a; return result *= b; } template SparseVector operator*(const SparseVector& a, const T& b) { SparseVector result = a; return result *= b; } template SparseVector operator*(const double& a, const SparseVector& b) { SparseVector result = b; return result *= a; } template std::ostream &operator<<(std::ostream &out, const SparseVector &vec) { return vec.operator<<(out); } namespace B64 { void Encode(double objective, const SparseVector& v, std::ostream* out); // returns false if failed to decode bool Decode(double* objective, SparseVector* v, const char* data, size_t size); } #endif