From 282bc08647eb6856f798da0c64dd67530e02842a Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 21 Apr 2011 00:52:17 -0400 Subject: beginning of fast sparse vector impl --- utils/Makefile.am | 9 ++- utils/fast_sparse_vector.h | 135 +++++++++++++++++++++++++++++++++++++++++++++ utils/ts.cc | 36 ++++++++++++ 3 files changed, 177 insertions(+), 3 deletions(-) create mode 100644 utils/fast_sparse_vector.h create mode 100644 utils/ts.cc diff --git a/utils/Makefile.am b/utils/Makefile.am index 9556f507..94f9be30 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -1,12 +1,14 @@ +noinst_PROGRAMS = ts +TESTS = ts + if HAVE_GTEST -noinst_PROGRAMS = \ +noinst_PROGRAMS += \ dict_test \ weights_test \ logval_test \ small_vector_test -TESTS = small_vector_test logval_test weights_test dict_test - +TESTS += small_vector_test logval_test weights_test dict_test endif noinst_LIBRARIES = libutils.a @@ -25,6 +27,7 @@ libutils_a_SOURCES = \ verbose.cc \ weights.cc +ts_SOURCES = ts.cc dict_test_SOURCES = dict_test.cc dict_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) weights_test_SOURCES = weights_test.cc diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h new file mode 100644 index 00000000..91bf441d --- /dev/null +++ b/utils/fast_sparse_vector.h @@ -0,0 +1,135 @@ +#ifndef _FAST_SPARSE_VECTOR_H_ +#define _FAST_SPARSE_VECTOR_H_ + +#include +#include + +#include + +// these are architecture dependent, they should be +// detected in some way but it's probably easiest (for me) +// to just set them +#define L2_CACHE_LINE 128 + +// this should just be a typedef to pair on the new c++ +template +struct PairIntFloat { + int first; + T second; + const PairIntFloat& operator=(const std::pair& v) { + first = v.first; + second = v.second; + return *this; + } + operator const std::pair&() const { + return *reinterpret_cast*>(this); + } +}; +BOOST_STATIC_ASSERT(sizeof(PairIntFloat) == sizeof(std::pair)); + +template +class FastSparseVector { + public: + FastSparseVector() : local_size_(0), is_local_(true) {} + ~FastSparseVector() { + if (!is_local_) delete data_.rbmap; + } + FastSparseVector(const FastSparseVector& other) { + memcpy(this, &other, sizeof(FastSparseVector)); + if (is_local_) return; + data_.rbmap = new std::map(*data_.rbmap); + } + const FastSparseVector& operator=(const FastSparseVector& other) { + if (!is_local_) delete data_.rbmap; + memcpy(this, &other, sizeof(FastSparseVector)); + if (is_local_) return *this; + data_.rbmap = new std::map(*data_.rbmap); + return *this; + } + inline void set_value(int k, const T& v) { + get_or_create_bin(k) = v; + } + inline T value(int k) const { + if (is_local_) { + for (int i = 0; i < local_size_; ++i) { + const PairIntFloat& p = data_.local[i]; + if (p.first == k) return p.second; + } + } else { + std::map::const_iterator it = data_.rbmap->find(k); + if (it != data_.rbmap->end()) return it->second; + } + return T(); + } + inline size_t size() const { + if (is_local_) return local_size_; + return data_.rbmap->size(); + } + inline void clear() { + if (!is_local_) delete data_.rbmap; + local_size_ = 0; + } + inline bool empty() const { + return size() == 0; + } + inline FastSparseVector& operator+=(const FastSparseVector& other) { + if (!is_local_) { + } else if (!other.is_local_) { + } else { // both local + } + return *this; + } + private: + inline T& get_or_create_bin(int k) { + if (is_local_) { + for (int i = 0; i < local_size_; ++i) + if (data_.local[i].first == k) return data_.local[i].second; + } else { + return (*data_.rbmap)[k]; + } + assert(is_local_); + // currently local! + if (local_size_ < LOCAL_MAX) { + PairIntFloat& p = data_.local[local_size_]; + ++local_size_; + p.first = k; + return p.second; + } else { + swap_local_rbmap(); + return (*data_.rbmap)[k]; + } + } + void swap_local_rbmap() { + if (is_local_) { // data is local, move to rbmap + std::map* m = new std::map(&data_.local[0], &data_.local[local_size_]); + data_.rbmap = m; + is_local_ = false; + } else { // data is in rbmap, move to local + assert(data_.rbmap->size() < LOCAL_MAX); + const std::map* m = data_.rbmap; + local_size_ = m->size(); + int i = 0; + for (typename std::map::const_iterator it = m->begin(); + it != m->end(); ++it) { + data_.local[i] = *it; + ++i; + } + is_local_ = true; + } + } + + union { + PairIntFloat local[LOCAL_MAX]; + std::map* rbmap; + } data_; + unsigned char local_size_; + bool is_local_; +}; + +namespace performance_checks { + // if you get a failure on the next line, you should adjust LOCAL_MAX for + // your architecture + BOOST_STATIC_ASSERT(sizeof(FastSparseVector) == L2_CACHE_LINE); +}; + +#endif diff --git a/utils/ts.cc b/utils/ts.cc new file mode 100644 index 00000000..1febed3c --- /dev/null +++ b/utils/ts.cc @@ -0,0 +1,36 @@ +#include +#include +#include + +#include + +#include "prob.h" +#include "sparse_vector.h" +#include "fast_sparse_vector.h" + +using namespace std; + +int main() { + cerr << sizeof(prob_t) << " " << sizeof(LogVal) << endl; + cerr << " sizeof(FSV) = " << sizeof(FastSparseVector) << endl; + cerr << "sizeof(FSV) = " << sizeof(FastSparseVector) << endl; + sranddev(); + int c = 0; + for (int i = 0; i < 1000000; ++i) { + FastSparseVector x; + //SparseVector x; + for (int j = 0; j < 15; ++j) { + const int k = rand() % 1000; + const float v = rand() / 3.14f; + x.set_value(k,v); + } + //SparseVector y = x; + FastSparseVector y = x; + y += x; + y = x; + if (y.value(50)) { c++; } + } + cerr << "Counted " << c << " times\n"; + return 0; +} + -- cgit v1.2.3