summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-04-21 00:52:17 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2011-04-21 00:52:17 -0400
commit282bc08647eb6856f798da0c64dd67530e02842a (patch)
tree97553e7e4208b9c0eb98e2168eb9faefc367ee66
parentaf0526d5994d641352bdba499d9a74f9e4824b4f (diff)
beginning of fast sparse vector impl
-rw-r--r--utils/Makefile.am9
-rw-r--r--utils/fast_sparse_vector.h135
-rw-r--r--utils/ts.cc36
3 files changed, 177 insertions, 3 deletions
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 9556f507..94f9be30 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -1,12 +1,14 @@
+noinst_PROGRAMS = ts
+TESTS = ts
+
if HAVE_GTEST
-noinst_PROGRAMS = \
+noinst_PROGRAMS += \
dict_test \
weights_test \
logval_test \
small_vector_test
-TESTS = small_vector_test logval_test weights_test dict_test
-
+TESTS += small_vector_test logval_test weights_test dict_test
endif
noinst_LIBRARIES = libutils.a
@@ -25,6 +27,7 @@ libutils_a_SOURCES = \
verbose.cc \
weights.cc
+ts_SOURCES = ts.cc
dict_test_SOURCES = dict_test.cc
dict_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS)
weights_test_SOURCES = weights_test.cc
diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h
new file mode 100644
index 00000000..91bf441d
--- /dev/null
+++ b/utils/fast_sparse_vector.h
@@ -0,0 +1,135 @@
+#ifndef _FAST_SPARSE_VECTOR_H_
+#define _FAST_SPARSE_VECTOR_H_
+
+#include <map>
+#include <cassert>
+
+#include <boost/static_assert.hpp>
+
+// these are architecture dependent, they should be
+// detected in some way but it's probably easiest (for me)
+// to just set them
+#define L2_CACHE_LINE 128
+
+// this should just be a typedef to pair<int,float> on the new c++
+template <typename T>
+struct PairIntFloat {
+ int first;
+ T second;
+ const PairIntFloat& operator=(const std::pair<const int, float>& v) {
+ first = v.first;
+ second = v.second;
+ return *this;
+ }
+ operator const std::pair<const int, float>&() const {
+ return *reinterpret_cast<const std::pair<const int, float>*>(this);
+ }
+};
+BOOST_STATIC_ASSERT(sizeof(PairIntFloat<float>) == sizeof(std::pair<int,float>));
+
+template <typename T, int LOCAL_MAX = (sizeof(T) == sizeof(float) ? 15 : 7)>
+class FastSparseVector {
+ public:
+ FastSparseVector() : local_size_(0), is_local_(true) {}
+ ~FastSparseVector() {
+ if (!is_local_) delete data_.rbmap;
+ }
+ FastSparseVector(const FastSparseVector& other) {
+ memcpy(this, &other, sizeof(FastSparseVector));
+ if (is_local_) return;
+ data_.rbmap = new std::map<int, T>(*data_.rbmap);
+ }
+ const FastSparseVector& operator=(const FastSparseVector& other) {
+ if (!is_local_) delete data_.rbmap;
+ memcpy(this, &other, sizeof(FastSparseVector));
+ if (is_local_) return *this;
+ data_.rbmap = new std::map<int, T>(*data_.rbmap);
+ return *this;
+ }
+ inline void set_value(int k, const T& v) {
+ get_or_create_bin(k) = v;
+ }
+ inline T value(int k) const {
+ if (is_local_) {
+ for (int i = 0; i < local_size_; ++i) {
+ const PairIntFloat<T>& p = data_.local[i];
+ if (p.first == k) return p.second;
+ }
+ } else {
+ std::map<int, float>::const_iterator it = data_.rbmap->find(k);
+ if (it != data_.rbmap->end()) return it->second;
+ }
+ return T();
+ }
+ inline size_t size() const {
+ if (is_local_) return local_size_;
+ return data_.rbmap->size();
+ }
+ inline void clear() {
+ if (!is_local_) delete data_.rbmap;
+ local_size_ = 0;
+ }
+ inline bool empty() const {
+ return size() == 0;
+ }
+ inline FastSparseVector& operator+=(const FastSparseVector& other) {
+ if (!is_local_) {
+ } else if (!other.is_local_) {
+ } else { // both local
+ }
+ return *this;
+ }
+ private:
+ inline T& get_or_create_bin(int k) {
+ if (is_local_) {
+ for (int i = 0; i < local_size_; ++i)
+ if (data_.local[i].first == k) return data_.local[i].second;
+ } else {
+ return (*data_.rbmap)[k];
+ }
+ assert(is_local_);
+ // currently local!
+ if (local_size_ < LOCAL_MAX) {
+ PairIntFloat<T>& p = data_.local[local_size_];
+ ++local_size_;
+ p.first = k;
+ return p.second;
+ } else {
+ swap_local_rbmap();
+ return (*data_.rbmap)[k];
+ }
+ }
+ void swap_local_rbmap() {
+ if (is_local_) { // data is local, move to rbmap
+ std::map<int, T>* m = new std::map<int, T>(&data_.local[0], &data_.local[local_size_]);
+ data_.rbmap = m;
+ is_local_ = false;
+ } else { // data is in rbmap, move to local
+ assert(data_.rbmap->size() < LOCAL_MAX);
+ const std::map<int, T>* m = data_.rbmap;
+ local_size_ = m->size();
+ int i = 0;
+ for (typename std::map<int, T>::const_iterator it = m->begin();
+ it != m->end(); ++it) {
+ data_.local[i] = *it;
+ ++i;
+ }
+ is_local_ = true;
+ }
+ }
+
+ union {
+ PairIntFloat<T> local[LOCAL_MAX];
+ std::map<int, T>* rbmap;
+ } data_;
+ unsigned char local_size_;
+ bool is_local_;
+};
+
+namespace performance_checks {
+ // if you get a failure on the next line, you should adjust LOCAL_MAX for
+ // your architecture
+ BOOST_STATIC_ASSERT(sizeof(FastSparseVector<float>) == L2_CACHE_LINE);
+};
+
+#endif
diff --git a/utils/ts.cc b/utils/ts.cc
new file mode 100644
index 00000000..1febed3c
--- /dev/null
+++ b/utils/ts.cc
@@ -0,0 +1,36 @@
+#include <iostream>
+#include <map>
+#include <cassert>
+
+#include <boost/static_assert.hpp>
+
+#include "prob.h"
+#include "sparse_vector.h"
+#include "fast_sparse_vector.h"
+
+using namespace std;
+
+int main() {
+ cerr << sizeof(prob_t) << " " << sizeof(LogVal<float>) << endl;
+ cerr << " sizeof(FSV<float>) = " << sizeof(FastSparseVector<float>) << endl;
+ cerr << "sizeof(FSV<double>) = " << sizeof(FastSparseVector<double>) << endl;
+ sranddev();
+ int c = 0;
+ for (int i = 0; i < 1000000; ++i) {
+ FastSparseVector<float> x;
+ //SparseVector<float> x;
+ for (int j = 0; j < 15; ++j) {
+ const int k = rand() % 1000;
+ const float v = rand() / 3.14f;
+ x.set_value(k,v);
+ }
+ //SparseVector<float> y = x;
+ FastSparseVector<float> y = x;
+ y += x;
+ y = x;
+ if (y.value(50)) { c++; }
+ }
+ cerr << "Counted " << c << " times\n";
+ return 0;
+}
+