summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cab.ark.cs.cmu.edu>2012-06-18 23:37:22 -0400
committerChris Dyer <cdyer@cab.ark.cs.cmu.edu>2012-06-18 23:37:22 -0400
commit78cc819168b2a550e52e9cac06dbbed41a3b04b2 (patch)
treef2645ac7928ea88d470c273333fe468a8dadbcb7
parentc4a8ec7677eaf72c71d8409475ed0f2ca9f9b14d (diff)
switch to hash maps for sparse vectors
-rw-r--r--utils/dict.h3
-rw-r--r--utils/fast_sparse_vector.h30
-rw-r--r--utils/hash.h7
3 files changed, 23 insertions, 17 deletions
diff --git a/utils/dict.h b/utils/dict.h
index 75ea3def..f08d0cf4 100644
--- a/utils/dict.h
+++ b/utils/dict.h
@@ -12,7 +12,8 @@
class Dict {
typedef
- HASH_MAP<std::string, WordID, boost::hash<std::string> > Map;
+ //HASH_MAP<std::string, WordID, boost::hash<std::string> > Map;
+ HASH_MAP<std::string, WordID> Map;
public:
Dict() : b0_("<bad0>") {
HASH_MAP_EMPTY(d_,"<bad1>");
diff --git a/utils/fast_sparse_vector.h b/utils/fast_sparse_vector.h
index 6e5dfb14..433a5cc5 100644
--- a/utils/fast_sparse_vector.h
+++ b/utils/fast_sparse_vector.h
@@ -88,7 +88,7 @@ class FastSparseVector {
}
const bool local_;
PairIntT<T>* local_it_;
- typename std::map<unsigned, T>::iterator remote_it_;
+ typename SPARSE_HASH_MAP<unsigned, T>::iterator remote_it_;
std::pair<const unsigned, T>& operator*() const {
if (local_)
return *reinterpret_cast<std::pair<const unsigned, T>*>(local_it_);
@@ -142,7 +142,7 @@ class FastSparseVector {
}
const bool local_;
const PairIntT<T>* local_it_;
- typename std::map<unsigned, T>::const_iterator remote_it_;
+ typename SPARSE_HASH_MAP<unsigned, T>::const_iterator remote_it_;
const std::pair<const unsigned, T>& operator*() const {
if (local_)
return *reinterpret_cast<const std::pair<const unsigned, T>*>(local_it_);
@@ -181,7 +181,7 @@ class FastSparseVector {
}
FastSparseVector(const FastSparseVector& other) {
std::memcpy(this, &other, sizeof(FastSparseVector));
- if (is_remote_) data_.rbmap = new std::map<unsigned, T>(*data_.rbmap);
+ if (is_remote_) data_.rbmap = new SPARSE_HASH_MAP<unsigned, T>(*data_.rbmap);
}
FastSparseVector(std::pair<unsigned, T>* first, std::pair<unsigned, T>* last) {
const ptrdiff_t n = last - first;
@@ -191,7 +191,7 @@ class FastSparseVector {
std::memcpy(data_.local, first, sizeof(std::pair<unsigned, T>) * n);
} else {
is_remote_ = true;
- data_.rbmap = new std::map<unsigned, T>(first, last);
+ data_.rbmap = new SPARSE_HASH_MAP<unsigned, T>(first, last);
}
}
void erase(int k) {
@@ -213,7 +213,7 @@ class FastSparseVector {
clear();
std::memcpy(this, &other, sizeof(FastSparseVector));
if (is_remote_)
- data_.rbmap = new std::map<unsigned, T>(*data_.rbmap);
+ data_.rbmap = new SPARSE_HASH_MAP<unsigned, T>(*data_.rbmap);
return *this;
}
T const& get_singleton() const {
@@ -237,7 +237,7 @@ class FastSparseVector {
}
inline T value(unsigned k) const {
if (is_remote_) {
- typename std::map<unsigned, T>::const_iterator it = data_.rbmap->find(k);
+ typename SPARSE_HASH_MAP<unsigned, T>::const_iterator it = data_.rbmap->find(k);
if (it != data_.rbmap->end()) return it->second;
} else {
for (unsigned i = 0; i < local_size_; ++i) {
@@ -322,8 +322,8 @@ class FastSparseVector {
}
inline FastSparseVector& operator*=(const T& scalar) {
if (is_remote_) {
- const typename std::map<unsigned, T>::iterator end = data_.rbmap->end();
- for (typename std::map<unsigned, T>::iterator it = data_.rbmap->begin(); it != end; ++it)
+ const typename SPARSE_HASH_MAP<unsigned, T>::iterator end = data_.rbmap->end();
+ for (typename SPARSE_HASH_MAP<unsigned, T>::iterator it = data_.rbmap->begin(); it != end; ++it)
it->second *= scalar;
} else {
for (int i = 0; i < local_size_; ++i)
@@ -333,8 +333,8 @@ class FastSparseVector {
}
inline FastSparseVector& operator/=(const T& scalar) {
if (is_remote_) {
- const typename std::map<unsigned, T>::iterator end = data_.rbmap->end();
- for (typename std::map<unsigned, T>::iterator it = data_.rbmap->begin(); it != end; ++it)
+ const typename SPARSE_HASH_MAP<unsigned, T>::iterator end = data_.rbmap->end();
+ for (typename SPARSE_HASH_MAP<unsigned, T>::iterator it = data_.rbmap->begin(); it != end; ++it)
it->second /= scalar;
} else {
for (int i = 0; i < local_size_; ++i)
@@ -431,17 +431,19 @@ class FastSparseVector {
void swap_local_rbmap() {
if (is_remote_) { // data is in rbmap, move to local
assert(data_.rbmap->size() < LOCAL_MAX);
- const std::map<unsigned, T>* m = data_.rbmap;
+ const SPARSE_HASH_MAP<unsigned, T>* m = data_.rbmap;
local_size_ = m->size();
int i = 0;
- for (typename std::map<unsigned, T>::const_iterator it = m->begin();
+ for (typename SPARSE_HASH_MAP<unsigned, T>::const_iterator it = m->begin();
it != m->end(); ++it) {
data_.local[i] = *it;
++i;
}
is_remote_ = false;
} else { // data is local, move to rbmap
- std::map<unsigned, T>* m = new std::map<unsigned, T>(&data_.local[0], &data_.local[local_size_]);
+ SPARSE_HASH_MAP<unsigned, T>* m = new SPARSE_HASH_MAP<unsigned, T>(
+ reinterpret_cast<std::pair<unsigned, T>*>(&data_.local[0]),
+ reinterpret_cast<std::pair<unsigned, T>*>(&data_.local[local_size_]), local_size_ * 1.5 + 1);
data_.rbmap = m;
is_remote_ = true;
}
@@ -449,7 +451,7 @@ class FastSparseVector {
union {
PairIntT<T> local[LOCAL_MAX];
- std::map<unsigned, T>* rbmap;
+ SPARSE_HASH_MAP<unsigned, T>* rbmap;
} data_;
unsigned char local_size_;
bool is_remote_;
diff --git a/utils/hash.h b/utils/hash.h
index 31457430..6d992086 100644
--- a/utils/hash.h
+++ b/utils/hash.h
@@ -10,8 +10,10 @@
#endif
#ifdef HAVE_SPARSEHASH
-# include <google/dense_hash_map>
-# include <google/dense_hash_set>
+# include <sparsehash/dense_hash_map>
+# include <sparsehash/dense_hash_set>
+# include <sparsehash/sparse_hash_map>
+# define SPARSE_HASH_MAP google::sparse_hash_map
# define HASH_MAP google::dense_hash_map
# define HASH_SET google::dense_hash_set
# define HASH_MAP_RESERVED(h,empty,deleted) do { h.set_empty_key(empty); h.set_deleted_key(deleted); } while(0)
@@ -19,6 +21,7 @@
#else
# include <tr1/unordered_map>
# include <tr1/unordered_set>
+# define SPARSE_HASH_MAP std::tr1::unordered_map
# define HASH_MAP std::tr1::unordered_map
# define HASH_SET std::tr1::unordered_set
# define HASH_MAP_RESERVED(h,empty,deleted)