#ifndef _FAST_SPARSE_VECTOR_H_ #define _FAST_SPARSE_VECTOR_H_ // FastSparseVector is a integer indexed unordered map that supports very fast // (mathematical) vector operations when the sizes are very small, and reasonably // fast operations when the sizes are large. // important: indexes are integers // important: iterators may return elements in any order #include #include #include #include #include #include // this is architecture dependent, it should be // detected in some way but it's probably easiest (for me) // to just set it #define L2_CACHE_LINE 128 // this should just be a typedef to pair on the new c++ template struct PairIntT { int first; T second; const PairIntT& operator=(const std::pair& v) { first = v.first; second = v.second; return *this; } operator const std::pair&() const { return *reinterpret_cast*>(this); } }; BOOST_STATIC_ASSERT(sizeof(PairIntT) == sizeof(std::pair)); template class FastSparseVector { public: struct const_iterator { const_iterator(const FastSparseVector& v, const bool is_end) : local_(v.is_local_) { if (local_) { local_it_ = &v.data_.local[is_end ? v.local_size_ : 0]; } else { if (is_end) remote_it_ = v.data_.rbmap->end(); else remote_it_ = v.data_.rbmap->begin(); } } const bool local_; const PairIntT* local_it_; typename std::map::const_iterator remote_it_; const std::pair& operator*() const { if (local_) return *reinterpret_cast*>(local_it_); else return *remote_it_; } const std::pair* operator->() const { if (local_) return reinterpret_cast*>(local_it_); else return &*remote_it_; } const_iterator& operator++() { if (local_) ++local_it_; else ++remote_it_; return *this; } inline bool operator==(const const_iterator& o) const { if (o.local_ != local_) return false; if (local_) { return local_it_ == o.local_it_; } else { return remote_it_ == o.remote_it_; } } inline bool operator!=(const const_iterator& o) const { return !(o == *this); } }; public: FastSparseVector() : local_size_(0), is_local_(true) {} ~FastSparseVector() { if (!is_local_) delete data_.rbmap; } FastSparseVector(const FastSparseVector& other) { std::memcpy(this, &other, sizeof(FastSparseVector)); if (is_local_) return; data_.rbmap = new std::map(*data_.rbmap); } const FastSparseVector& operator=(const FastSparseVector& other) { if (!is_local_) delete data_.rbmap; std::memcpy(this, &other, sizeof(FastSparseVector)); if (is_local_) return *this; data_.rbmap = new std::map(*data_.rbmap); return *this; } inline void set_value(int k, const T& v) { get_or_create_bin(k) = v; } inline T value(int k) const { if (is_local_) { for (int i = 0; i < local_size_; ++i) { const PairIntT& p = data_.local[i]; if (p.first == k) return p.second; } } else { typename std::map::const_iterator it = data_.rbmap->find(k); if (it != data_.rbmap->end()) return it->second; } return T(); } inline size_t size() const { if (is_local_) return local_size_; return data_.rbmap->size(); } inline void clear() { if (!is_local_) delete data_.rbmap; local_size_ = 0; } inline bool empty() const { return size() == 0; } inline FastSparseVector& operator+=(const FastSparseVector& other) { if (empty()) { *this = other; return *this; } const typename FastSparseVector::const_iterator end = other.end(); for (typename FastSparseVector::const_iterator it = other.begin(); it != end; ++it) { get_or_create_bin(it->first) += it->second; } return *this; } inline FastSparseVector& operator-=(const FastSparseVector& other) { const typename FastSparseVector::const_iterator end = other.end(); for (typename FastSparseVector::const_iterator it = other.begin(); it != end; ++it) { get_or_create_bin(it->first) -= it->second; } return *this; } inline FastSparseVector& operator*=(const T& scalar) { if (is_local_) { for (int i = 0; i < local_size_; ++i) data_.local[i].second *= scalar; } else { const typename std::map::iterator end = data_.rbmap->end(); for (typename std::map::iterator it = data_.rbmap->begin(); it != end; ++it) it->second *= scalar; } return *this; } inline FastSparseVector& operator/=(const T& scalar) { if (is_local_) { for (int i = 0; i < local_size_; ++i) data_.local[i].second /= scalar; } else { const typename std::map::iterator end = data_.rbmap->end(); for (typename std::map::iterator it = data_.rbmap->begin(); it != end; ++it) it->second /= scalar; } return *this; } const_iterator begin() const { return const_iterator(*this, false); } const_iterator end() const { return const_iterator(*this, true); } void init_vector(std::vector *vp) const { init_vector(*vp); } void init_vector(std::vector &v) const { v.clear(); for (const_iterator i=begin(),e=end();i!=e;++i) extend_vector(v,i->first)=i->second; } T dot(const std::vector& v) const { T res = T(); for (const_iterator it = begin(), e = end(); it != e; ++it) if (it->first < v.size()) res += it->second * v[it->first]; } private: inline T& extend_vector(std::vector &v,int i) { if (i>=v.size()) v.resize(i+1); return v[i]; } inline T& get_or_create_bin(int k) { if (is_local_) { for (int i = 0; i < local_size_; ++i) if (data_.local[i].first == k) return data_.local[i].second; } else { return (*data_.rbmap)[k]; } assert(is_local_); // currently local! if (local_size_ < LOCAL_MAX) { PairIntT& p = data_.local[local_size_]; ++local_size_; p.first = k; return p.second; } else { swap_local_rbmap(); return (*data_.rbmap)[k]; } } void swap_local_rbmap() { if (is_local_) { // data is local, move to rbmap std::map* m = new std::map(&data_.local[0], &data_.local[local_size_]); data_.rbmap = m; is_local_ = false; } else { // data is in rbmap, move to local assert(data_.rbmap->size() < LOCAL_MAX); const std::map* m = data_.rbmap; local_size_ = m->size(); int i = 0; for (typename std::map::const_iterator it = m->begin(); it != m->end(); ++it) { data_.local[i] = *it; ++i; } is_local_ = true; } } union { PairIntT local[LOCAL_MAX]; std::map* rbmap; } data_; unsigned char local_size_; bool is_local_; }; template const FastSparseVector operator+(const FastSparseVector& x, const FastSparseVector& y) { if (x.size() > y.size()) { FastSparseVector res(x); res += y; return res; } else { FastSparseVector res(y); res += x; return res; } } template const FastSparseVector operator-(const FastSparseVector& x, const FastSparseVector& y) { if (x.size() > y.size()) { FastSparseVector res(x); res -= y; return res; } else { FastSparseVector res(y); res -= x; return res; } } namespace performance_checks { // if you get a failure on the next line, you should adjust LOCAL_MAX for // your architecture BOOST_STATIC_ASSERT(sizeof(FastSparseVector) == L2_CACHE_LINE); }; #include "fdict.h" template inline void print(O &o,const FastSparseVector& v, const char* kvsep="=",const char* pairsep=" ",const char* pre="",const char* post="") { o << pre; bool first=true; for (typename FastSparseVector::const_iterator i=v.begin(),e=v.end();i!=e;++i) { if (first) first=false; else o<first)<second; } o << post; } template inline std::ostream& operator<<(std::ostream& out, const FastSparseVector& v) { print(out, v); return out; } #endif