summaryrefslogtreecommitdiff
path: root/utils/fast_sparse_vector.h
diff options
context:
space:
mode:
Diffstat (limited to 'utils/fast_sparse_vector.h')
-rw-r--r--utils/fast_sparse_vector.h135
1 files changed, 135 insertions, 0 deletions
diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h
new file mode 100644
index 00000000..91bf441d
--- /dev/null
+++ b/utils/fast_sparse_vector.h
@@ -0,0 +1,135 @@
+#ifndef _FAST_SPARSE_VECTOR_H_
+#define _FAST_SPARSE_VECTOR_H_
+
+#include <map>
+#include <cassert>
+
+#include <boost/static_assert.hpp>
+
+// these are architecture dependent, they should be
+// detected in some way but it's probably easiest (for me)
+// to just set them
+#define L2_CACHE_LINE 128
+
+// this should just be a typedef to pair<int,float> on the new c++
+template <typename T>
+struct PairIntFloat {
+ int first;
+ T second;
+ const PairIntFloat& operator=(const std::pair<const int, float>& v) {
+ first = v.first;
+ second = v.second;
+ return *this;
+ }
+ operator const std::pair<const int, float>&() const {
+ return *reinterpret_cast<const std::pair<const int, float>*>(this);
+ }
+};
+BOOST_STATIC_ASSERT(sizeof(PairIntFloat<float>) == sizeof(std::pair<int,float>));
+
+template <typename T, int LOCAL_MAX = (sizeof(T) == sizeof(float) ? 15 : 7)>
+class FastSparseVector {
+ public:
+ FastSparseVector() : local_size_(0), is_local_(true) {}
+ ~FastSparseVector() {
+ if (!is_local_) delete data_.rbmap;
+ }
+ FastSparseVector(const FastSparseVector& other) {
+ memcpy(this, &other, sizeof(FastSparseVector));
+ if (is_local_) return;
+ data_.rbmap = new std::map<int, T>(*data_.rbmap);
+ }
+ const FastSparseVector& operator=(const FastSparseVector& other) {
+ if (!is_local_) delete data_.rbmap;
+ memcpy(this, &other, sizeof(FastSparseVector));
+ if (is_local_) return *this;
+ data_.rbmap = new std::map<int, T>(*data_.rbmap);
+ return *this;
+ }
+ inline void set_value(int k, const T& v) {
+ get_or_create_bin(k) = v;
+ }
+ inline T value(int k) const {
+ if (is_local_) {
+ for (int i = 0; i < local_size_; ++i) {
+ const PairIntFloat<T>& p = data_.local[i];
+ if (p.first == k) return p.second;
+ }
+ } else {
+ std::map<int, float>::const_iterator it = data_.rbmap->find(k);
+ if (it != data_.rbmap->end()) return it->second;
+ }
+ return T();
+ }
+ inline size_t size() const {
+ if (is_local_) return local_size_;
+ return data_.rbmap->size();
+ }
+ inline void clear() {
+ if (!is_local_) delete data_.rbmap;
+ local_size_ = 0;
+ }
+ inline bool empty() const {
+ return size() == 0;
+ }
+ inline FastSparseVector& operator+=(const FastSparseVector& other) {
+ if (!is_local_) {
+ } else if (!other.is_local_) {
+ } else { // both local
+ }
+ return *this;
+ }
+ private:
+ inline T& get_or_create_bin(int k) {
+ if (is_local_) {
+ for (int i = 0; i < local_size_; ++i)
+ if (data_.local[i].first == k) return data_.local[i].second;
+ } else {
+ return (*data_.rbmap)[k];
+ }
+ assert(is_local_);
+ // currently local!
+ if (local_size_ < LOCAL_MAX) {
+ PairIntFloat<T>& p = data_.local[local_size_];
+ ++local_size_;
+ p.first = k;
+ return p.second;
+ } else {
+ swap_local_rbmap();
+ return (*data_.rbmap)[k];
+ }
+ }
+ void swap_local_rbmap() {
+ if (is_local_) { // data is local, move to rbmap
+ std::map<int, T>* m = new std::map<int, T>(&data_.local[0], &data_.local[local_size_]);
+ data_.rbmap = m;
+ is_local_ = false;
+ } else { // data is in rbmap, move to local
+ assert(data_.rbmap->size() < LOCAL_MAX);
+ const std::map<int, T>* m = data_.rbmap;
+ local_size_ = m->size();
+ int i = 0;
+ for (typename std::map<int, T>::const_iterator it = m->begin();
+ it != m->end(); ++it) {
+ data_.local[i] = *it;
+ ++i;
+ }
+ is_local_ = true;
+ }
+ }
+
+ union {
+ PairIntFloat<T> local[LOCAL_MAX];
+ std::map<int, T>* rbmap;
+ } data_;
+ unsigned char local_size_;
+ bool is_local_;
+};
+
+namespace performance_checks {
+ // if you get a failure on the next line, you should adjust LOCAL_MAX for
+ // your architecture
+ BOOST_STATIC_ASSERT(sizeof(FastSparseVector<float>) == L2_CACHE_LINE);
+};
+
+#endif