diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2011-04-21 00:52:17 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2011-04-21 00:52:17 -0400 |
commit | 282bc08647eb6856f798da0c64dd67530e02842a (patch) | |
tree | 97553e7e4208b9c0eb98e2168eb9faefc367ee66 | |
parent | af0526d5994d641352bdba499d9a74f9e4824b4f (diff) |
beginning of fast sparse vector impl
-rw-r--r-- | utils/Makefile.am | 9 | ||||
-rw-r--r-- | utils/fast_sparse_vector.h | 135 | ||||
-rw-r--r-- | utils/ts.cc | 36 |
3 files changed, 177 insertions, 3 deletions
diff --git a/utils/Makefile.am b/utils/Makefile.am index 9556f507..94f9be30 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -1,12 +1,14 @@ +noinst_PROGRAMS = ts +TESTS = ts + if HAVE_GTEST -noinst_PROGRAMS = \ +noinst_PROGRAMS += \ dict_test \ weights_test \ logval_test \ small_vector_test -TESTS = small_vector_test logval_test weights_test dict_test - +TESTS += small_vector_test logval_test weights_test dict_test endif noinst_LIBRARIES = libutils.a @@ -25,6 +27,7 @@ libutils_a_SOURCES = \ verbose.cc \ weights.cc +ts_SOURCES = ts.cc dict_test_SOURCES = dict_test.cc dict_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) weights_test_SOURCES = weights_test.cc diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h new file mode 100644 index 00000000..91bf441d --- /dev/null +++ b/utils/fast_sparse_vector.h @@ -0,0 +1,135 @@ +#ifndef _FAST_SPARSE_VECTOR_H_ +#define _FAST_SPARSE_VECTOR_H_ + +#include <map> +#include <cassert> + +#include <boost/static_assert.hpp> + +// these are architecture dependent, they should be +// detected in some way but it's probably easiest (for me) +// to just set them +#define L2_CACHE_LINE 128 + +// this should just be a typedef to pair<int,float> on the new c++ +template <typename T> +struct PairIntFloat { + int first; + T second; + const PairIntFloat& operator=(const std::pair<const int, float>& v) { + first = v.first; + second = v.second; + return *this; + } + operator const std::pair<const int, float>&() const { + return *reinterpret_cast<const std::pair<const int, float>*>(this); + } +}; +BOOST_STATIC_ASSERT(sizeof(PairIntFloat<float>) == sizeof(std::pair<int,float>)); + +template <typename T, int LOCAL_MAX = (sizeof(T) == sizeof(float) ? 15 : 7)> +class FastSparseVector { + public: + FastSparseVector() : local_size_(0), is_local_(true) {} + ~FastSparseVector() { + if (!is_local_) delete data_.rbmap; + } + FastSparseVector(const FastSparseVector& other) { + memcpy(this, &other, sizeof(FastSparseVector)); + if (is_local_) return; + data_.rbmap = new std::map<int, T>(*data_.rbmap); + } + const FastSparseVector& operator=(const FastSparseVector& other) { + if (!is_local_) delete data_.rbmap; + memcpy(this, &other, sizeof(FastSparseVector)); + if (is_local_) return *this; + data_.rbmap = new std::map<int, T>(*data_.rbmap); + return *this; + } + inline void set_value(int k, const T& v) { + get_or_create_bin(k) = v; + } + inline T value(int k) const { + if (is_local_) { + for (int i = 0; i < local_size_; ++i) { + const PairIntFloat<T>& p = data_.local[i]; + if (p.first == k) return p.second; + } + } else { + std::map<int, float>::const_iterator it = data_.rbmap->find(k); + if (it != data_.rbmap->end()) return it->second; + } + return T(); + } + inline size_t size() const { + if (is_local_) return local_size_; + return data_.rbmap->size(); + } + inline void clear() { + if (!is_local_) delete data_.rbmap; + local_size_ = 0; + } + inline bool empty() const { + return size() == 0; + } + inline FastSparseVector& operator+=(const FastSparseVector& other) { + if (!is_local_) { + } else if (!other.is_local_) { + } else { // both local + } + return *this; + } + private: + inline T& get_or_create_bin(int k) { + if (is_local_) { + for (int i = 0; i < local_size_; ++i) + if (data_.local[i].first == k) return data_.local[i].second; + } else { + return (*data_.rbmap)[k]; + } + assert(is_local_); + // currently local! + if (local_size_ < LOCAL_MAX) { + PairIntFloat<T>& p = data_.local[local_size_]; + ++local_size_; + p.first = k; + return p.second; + } else { + swap_local_rbmap(); + return (*data_.rbmap)[k]; + } + } + void swap_local_rbmap() { + if (is_local_) { // data is local, move to rbmap + std::map<int, T>* m = new std::map<int, T>(&data_.local[0], &data_.local[local_size_]); + data_.rbmap = m; + is_local_ = false; + } else { // data is in rbmap, move to local + assert(data_.rbmap->size() < LOCAL_MAX); + const std::map<int, T>* m = data_.rbmap; + local_size_ = m->size(); + int i = 0; + for (typename std::map<int, T>::const_iterator it = m->begin(); + it != m->end(); ++it) { + data_.local[i] = *it; + ++i; + } + is_local_ = true; + } + } + + union { + PairIntFloat<T> local[LOCAL_MAX]; + std::map<int, T>* rbmap; + } data_; + unsigned char local_size_; + bool is_local_; +}; + +namespace performance_checks { + // if you get a failure on the next line, you should adjust LOCAL_MAX for + // your architecture + BOOST_STATIC_ASSERT(sizeof(FastSparseVector<float>) == L2_CACHE_LINE); +}; + +#endif diff --git a/utils/ts.cc b/utils/ts.cc new file mode 100644 index 00000000..1febed3c --- /dev/null +++ b/utils/ts.cc @@ -0,0 +1,36 @@ +#include <iostream> +#include <map> +#include <cassert> + +#include <boost/static_assert.hpp> + +#include "prob.h" +#include "sparse_vector.h" +#include "fast_sparse_vector.h" + +using namespace std; + +int main() { + cerr << sizeof(prob_t) << " " << sizeof(LogVal<float>) << endl; + cerr << " sizeof(FSV<float>) = " << sizeof(FastSparseVector<float>) << endl; + cerr << "sizeof(FSV<double>) = " << sizeof(FastSparseVector<double>) << endl; + sranddev(); + int c = 0; + for (int i = 0; i < 1000000; ++i) { + FastSparseVector<float> x; + //SparseVector<float> x; + for (int j = 0; j < 15; ++j) { + const int k = rand() % 1000; + const float v = rand() / 3.14f; + x.set_value(k,v); + } + //SparseVector<float> y = x; + FastSparseVector<float> y = x; + y += x; + y = x; + if (y.value(50)) { c++; } + } + cerr << "Counted " << c << " times\n"; + return 0; +} + |