summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-20 19:43:31 +0000
committergraehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-20 19:43:31 +0000
commit2621d66cc3de03395ab0c7a6f85113ed45189641 (patch)
tree95d57d0f88fc7e5c9fdbd9c3f2a1117dda1aa4ae
parentd0063ea4ae7dbad8a9148ca3ebf6855cd2aa9af6 (diff)
optional google dense_hash_map, srim vocab (TDICT) as static - may help valgrind warning?
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@339 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r--configure.ac3
-rw-r--r--decoder/dict.h24
-rw-r--r--decoder/ff_bleu.cc2
-rw-r--r--decoder/ff_csplit.cc2
-rw-r--r--decoder/ff_lm.cc4
-rw-r--r--decoder/tdict.cc16
-rw-r--r--decoder/tdict.h2
7 files changed, 37 insertions, 16 deletions
diff --git a/configure.ac b/configure.ac
index 5f45cdeb..54d0c6bf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -17,6 +17,9 @@ LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS $BOOST_REGEX_LIBS $BOOST_THREAD_LIBS"
AC_CHECK_HEADER(boost/math/special_functions/digamma.hpp,
[AC_DEFINE([HAVE_BOOST_DIGAMMA], [], [flag for boost::math::digamma])])
+AC_CHECK_HEADER(google/dense_hash_map,
+ [AC_DEFINE([HAVE_SPARSEHASH], [], [flag for google::dense_hash_map])])
+
AC_PROG_INSTALL
GTEST_LIB_CHECK
diff --git a/decoder/dict.h b/decoder/dict.h
index 38231db6..82e8c0c0 100644
--- a/decoder/dict.h
+++ b/decoder/dict.h
@@ -3,7 +3,11 @@
#include <cassert>
#include <cstring>
-#include <tr1/unordered_map>
+#ifdef HAVE_SPARSEHASH
+# include <google/dense_hash_map>
+#else
+# include <tr1/unordered_map>
+#endif
#include <string>
#include <vector>
@@ -12,10 +16,22 @@
#include "wordid.h"
class Dict {
- typedef std::tr1::unordered_map<std::string, WordID, boost::hash<std::string> > Map;
+ typedef
+#ifdef HAVE_SPARSEHASH
+ std::tr1::unordered_map
+#else
+ google::dense_hash_map
+#endif
+ <std::string, WordID, boost::hash<std::string> > Map;
public:
- Dict() : b0_("<bad0>") { words_.reserve(1000); }
+ Dict() : b0_("<bad0>") {
+#ifdef HAVE_SPARSEHASH
+ d_.set_empty_key("<bad1>");
+ d_.set_deleted_key("<bad2>");
+#endif
+ words_.reserve(1000);
+ }
inline int max() const { return words_.size(); }
@@ -32,7 +48,7 @@ class Dict {
}
}
- inline WordID Convert(const std::vector<std::string>& words, bool frozen = false)
+ inline WordID Convert(const std::vector<std::string>& words, bool frozen = false)
{ return Convert(toString(words), frozen); }
static inline std::string toString(const std::vector<std::string>& words) {
diff --git a/decoder/ff_bleu.cc b/decoder/ff_bleu.cc
index 19564bd0..12c29d32 100644
--- a/decoder/ff_bleu.cc
+++ b/decoder/ff_bleu.cc
@@ -25,7 +25,7 @@ using namespace std;
class BLEUModelImpl {
public:
explicit BLEUModelImpl(int order) :
- ngram_(*TD::dict_, order), buffer_(), order_(order), state_size_(OrderToStateSize(order) - 1),
+ ngram_(TD::dict_, order), buffer_(), order_(order), state_size_(OrderToStateSize(order) - 1),
floor_(-100.0),
kSTART(TD::Convert("<s>")),
kSTOP(TD::Convert("</s>")),
diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc
index 658603e4..f267f8e8 100644
--- a/decoder/ff_csplit.cc
+++ b/decoder/ff_csplit.cc
@@ -149,7 +149,7 @@ void BasicCSplitFeatures::TraversalFeaturesImpl(
struct ReverseCharLMCSplitFeatureImpl {
ReverseCharLMCSplitFeatureImpl(const string& param) :
order_(5),
- vocab_(*TD::dict_),
+ vocab_(TD::dict_),
ngram_(vocab_, order_) {
kBOS = vocab_.getIndex("<s>");
kEOS = vocab_.getIndex("</s>");
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc
index bbf63338..2f0277c8 100644
--- a/decoder/ff_lm.cc
+++ b/decoder/ff_lm.cc
@@ -192,7 +192,7 @@ class LanguageModelImpl {
}
public:
- explicit LanguageModelImpl(int order) : ngram_(*TD::dict_, order)
+ explicit LanguageModelImpl(int order) : ngram_(TD::dict_, order)
{
init(order);
}
@@ -200,7 +200,7 @@ class LanguageModelImpl {
//TODO: show that unigram special case (0 state) computes what it should.
LanguageModelImpl(int order, const string& f, int load_order=0) :
- ngram_(*TD::dict_, load_order ? load_order : order)
+ ngram_(TD::dict_, load_order ? load_order : order)
{
init(order);
File file(f.c_str(), "r", 0);
diff --git a/decoder/tdict.cc b/decoder/tdict.cc
index ac590bd8..93f7b0eb 100644
--- a/decoder/tdict.cc
+++ b/decoder/tdict.cc
@@ -5,23 +5,25 @@
using namespace std;
-Vocab* TD::dict_ = new Vocab;
-
-static const string empty;
-static const string space = " ";
+//FIXME: valgrind errors (static init order?)
+Vocab TD::dict_;
unsigned int TD::NumWords() {
- return dict_->numWords();
+ return dict_.numWords();
}
WordID TD::Convert(const std::string& s) {
- return dict_->addWord((VocabString)s.c_str());
+ return dict_.addWord((VocabString)s.c_str());
}
const char* TD::Convert(const WordID& w) {
- return dict_->getWord((VocabIndex)w);
+ return dict_.getWord((VocabIndex)w);
}
+static const string empty;
+static const string space = " ";
+
+
void TD::GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids) {
ids->clear();
for (vector<string>::const_iterator i = strings.begin(); i != strings.end(); ++i)
diff --git a/decoder/tdict.h b/decoder/tdict.h
index 1fba5179..af1612ba 100644
--- a/decoder/tdict.h
+++ b/decoder/tdict.h
@@ -8,7 +8,7 @@
class Vocab;
struct TD {
- static Vocab* dict_;
+ static Vocab dict_;
static void ConvertSentence(const std::string& sent, std::vector<WordID>* ids);
static void GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids);
static std::string GetString(const std::vector<WordID>& str);