From 282bc08647eb6856f798da0c64dd67530e02842a Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 21 Apr 2011 00:52:17 -0400 Subject: beginning of fast sparse vector impl --- utils/fast_sparse_vector.h | 135 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 utils/fast_sparse_vector.h (limited to 'utils/fast_sparse_vector.h') diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h new file mode 100644 index 00000000..91bf441d --- /dev/null +++ b/utils/fast_sparse_vector.h @@ -0,0 +1,135 @@ +#ifndef _FAST_SPARSE_VECTOR_H_ +#define _FAST_SPARSE_VECTOR_H_ + +#include +#include + +#include + +// these are architecture dependent, they should be +// detected in some way but it's probably easiest (for me) +// to just set them +#define L2_CACHE_LINE 128 + +// this should just be a typedef to pair on the new c++ +template +struct PairIntFloat { + int first; + T second; + const PairIntFloat& operator=(const std::pair& v) { + first = v.first; + second = v.second; + return *this; + } + operator const std::pair&() const { + return *reinterpret_cast*>(this); + } +}; +BOOST_STATIC_ASSERT(sizeof(PairIntFloat) == sizeof(std::pair)); + +template +class FastSparseVector { + public: + FastSparseVector() : local_size_(0), is_local_(true) {} + ~FastSparseVector() { + if (!is_local_) delete data_.rbmap; + } + FastSparseVector(const FastSparseVector& other) { + memcpy(this, &other, sizeof(FastSparseVector)); + if (is_local_) return; + data_.rbmap = new std::map(*data_.rbmap); + } + const FastSparseVector& operator=(const FastSparseVector& other) { + if (!is_local_) delete data_.rbmap; + memcpy(this, &other, sizeof(FastSparseVector)); + if (is_local_) return *this; + data_.rbmap = new std::map(*data_.rbmap); + return *this; + } + inline void set_value(int k, const T& v) { + get_or_create_bin(k) = v; + } + inline T value(int k) const { + if (is_local_) { + for (int i = 0; i < local_size_; ++i) { + const PairIntFloat& p = data_.local[i]; + if (p.first == k) return p.second; + } + } else { + std::map::const_iterator it = data_.rbmap->find(k); + if (it != data_.rbmap->end()) return it->second; + } + return T(); + } + inline size_t size() const { + if (is_local_) return local_size_; + return data_.rbmap->size(); + } + inline void clear() { + if (!is_local_) delete data_.rbmap; + local_size_ = 0; + } + inline bool empty() const { + return size() == 0; + } + inline FastSparseVector& operator+=(const FastSparseVector& other) { + if (!is_local_) { + } else if (!other.is_local_) { + } else { // both local + } + return *this; + } + private: + inline T& get_or_create_bin(int k) { + if (is_local_) { + for (int i = 0; i < local_size_; ++i) + if (data_.local[i].first == k) return data_.local[i].second; + } else { + return (*data_.rbmap)[k]; + } + assert(is_local_); + // currently local! + if (local_size_ < LOCAL_MAX) { + PairIntFloat& p = data_.local[local_size_]; + ++local_size_; + p.first = k; + return p.second; + } else { + swap_local_rbmap(); + return (*data_.rbmap)[k]; + } + } + void swap_local_rbmap() { + if (is_local_) { // data is local, move to rbmap + std::map* m = new std::map(&data_.local[0], &data_.local[local_size_]); + data_.rbmap = m; + is_local_ = false; + } else { // data is in rbmap, move to local + assert(data_.rbmap->size() < LOCAL_MAX); + const std::map* m = data_.rbmap; + local_size_ = m->size(); + int i = 0; + for (typename std::map::const_iterator it = m->begin(); + it != m->end(); ++it) { + data_.local[i] = *it; + ++i; + } + is_local_ = true; + } + } + + union { + PairIntFloat local[LOCAL_MAX]; + std::map* rbmap; + } data_; + unsigned char local_size_; + bool is_local_; +}; + +namespace performance_checks { + // if you get a failure on the next line, you should adjust LOCAL_MAX for + // your architecture + BOOST_STATIC_ASSERT(sizeof(FastSparseVector) == L2_CACHE_LINE); +}; + +#endif -- cgit v1.2.3