optional google dense_hash_map, srim vocab (TDICT) as static - may help valgrind warning?

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@339 ec762483-ff6d-05da-a07a-a48fb63a330f
author: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-20 19:43:31 +0000
committer: graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> 2010-07-20 19:43:31 +0000
commit: 2621d66cc3de03395ab0c7a6f85113ed45189641 (patch)
tree: 95d57d0f88fc7e5c9fdbd9c3f2a1117dda1aa4ae
parent: d0063ea4ae7dbad8a9148ca3ebf6855cd2aa9af6 (diff)
7 files changed, 37 insertions, 16 deletions
diff --git a/configure.ac b/configure.ac
index 5f45cdeb..54d0c6bf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -17,6 +17,9 @@ LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS $BOOST_REGEX_LIBS $BOOST_THREAD_LIBS"
 AC_CHECK_HEADER(boost/math/special_functions/digamma.hpp,
                [AC_DEFINE([HAVE_BOOST_DIGAMMA], [], [flag for boost::math::digamma])])
 
+AC_CHECK_HEADER(google/dense_hash_map,
+               [AC_DEFINE([HAVE_SPARSEHASH], [], [flag for google::dense_hash_map])])
+
 AC_PROG_INSTALL
 GTEST_LIB_CHECK
 
diff --git a/decoder/dict.h b/decoder/dict.h
index 38231db6..82e8c0c0 100644
--- a/decoder/dict.h
+++ b/decoder/dict.h
@@ -3,7 +3,11 @@
 
 #include <cassert>
 #include <cstring>
-#include <tr1/unordered_map>
+#ifdef HAVE_SPARSEHASH
+# include <google/dense_hash_map>
+#else
+# include <tr1/unordered_map>
+#endif
 #include <string>
 #include <vector>
 
@@ -12,10 +16,22 @@
 #include "wordid.h"
 
 class Dict {
- typedef std::tr1::unordered_map<std::string, WordID, boost::hash<std::string> > Map;
+ typedef
+#ifdef HAVE_SPARSEHASH
+ std::tr1::unordered_map
+#else
+ google::dense_hash_map
+#endif
+ <std::string, WordID, boost::hash<std::string> > Map;
 
  public:
-  Dict() : b0_("<bad0>") { words_.reserve(1000); }
+  Dict() : b0_("<bad0>") {
+#ifdef HAVE_SPARSEHASH
+    d_.set_empty_key("<bad1>");
+    d_.set_deleted_key("<bad2>");
+#endif
+    words_.reserve(1000);
+  }
 
   inline int max() const { return words_.size(); }
 
@@ -32,7 +48,7 @@ class Dict {
     }
   }
 
-  inline WordID Convert(const std::vector<std::string>& words, bool frozen = false) 
+  inline WordID Convert(const std::vector<std::string>& words, bool frozen = false)
   { return Convert(toString(words), frozen); }
 
   static inline std::string toString(const std::vector<std::string>& words) {
diff --git a/decoder/ff_bleu.cc b/decoder/ff_bleu.cc
index 19564bd0..12c29d32 100644
--- a/decoder/ff_bleu.cc
+++ b/decoder/ff_bleu.cc
@@ -25,7 +25,7 @@ using namespace std;
 class BLEUModelImpl {
  public:
   explicit BLEUModelImpl(int order) :
-      ngram_(*TD::dict_, order), buffer_(), order_(order), state_size_(OrderToStateSize(order) - 1),
+      ngram_(TD::dict_, order), buffer_(), order_(order), state_size_(OrderToStateSize(order) - 1),
       floor_(-100.0),
       kSTART(TD::Convert("<s>")),
       kSTOP(TD::Convert("</s>")),
diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc
index 658603e4..f267f8e8 100644
--- a/decoder/ff_csplit.cc
+++ b/decoder/ff_csplit.cc
@@ -149,7 +149,7 @@ void BasicCSplitFeatures::TraversalFeaturesImpl(
 struct ReverseCharLMCSplitFeatureImpl {
   ReverseCharLMCSplitFeatureImpl(const string& param) :
       order_(5),
-      vocab_(*TD::dict_),
+      vocab_(TD::dict_),
       ngram_(vocab_, order_) {
     kBOS = vocab_.getIndex("<s>");
     kEOS = vocab_.getIndex("</s>");
diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc
index bbf63338..2f0277c8 100644
--- a/decoder/ff_lm.cc
+++ b/decoder/ff_lm.cc
@@ -192,7 +192,7 @@ class LanguageModelImpl {
   }
 
  public:
-  explicit LanguageModelImpl(int order) : ngram_(*TD::dict_, order)
+  explicit LanguageModelImpl(int order) : ngram_(TD::dict_, order)
   {
     init(order);
   }
@@ -200,7 +200,7 @@ class LanguageModelImpl {
 
 //TODO: show that unigram special case (0 state) computes what it should.
   LanguageModelImpl(int order, const string& f, int load_order=0) :
-    ngram_(*TD::dict_, load_order ? load_order : order)
+    ngram_(TD::dict_, load_order ? load_order : order)
   {
     init(order);
     File file(f.c_str(), "r", 0);
diff --git a/decoder/tdict.cc b/decoder/tdict.cc
index ac590bd8..93f7b0eb 100644
--- a/decoder/tdict.cc
+++ b/decoder/tdict.cc
@@ -5,23 +5,25 @@
 
 using namespace std;
 
-Vocab* TD::dict_ = new Vocab;
-
-static const string empty;
-static const string space = " ";
+//FIXME: valgrind errors (static init order?)
+Vocab TD::dict_;
 
 unsigned int TD::NumWords() {
-  return dict_->numWords();
+  return dict_.numWords();
 }
 
 WordID TD::Convert(const std::string& s) {
-  return dict_->addWord((VocabString)s.c_str());
+  return dict_.addWord((VocabString)s.c_str());
 }
 
 const char* TD::Convert(const WordID& w) {
-  return dict_->getWord((VocabIndex)w);
+  return dict_.getWord((VocabIndex)w);
 }
 
+static const string empty;
+static const string space = " ";
+
+
 void TD::GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids) {
   ids->clear();
   for (vector<string>::const_iterator i = strings.begin(); i != strings.end(); ++i)
diff --git a/decoder/tdict.h b/decoder/tdict.h
index 1fba5179..af1612ba 100644
--- a/decoder/tdict.h
+++ b/decoder/tdict.h
@@ -8,7 +8,7 @@
 class Vocab;
 
 struct TD {
-  static Vocab* dict_;
+  static Vocab dict_;
   static void ConvertSentence(const std::string& sent, std::vector<WordID>* ids);
   static void GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids);
   static std::string GetString(const std::vector<WordID>& str);
author	graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-20 19:43:31 +0000
committer	graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f>	2010-07-20 19:43:31 +0000
commit	2621d66cc3de03395ab0c7a6f85113ed45189641 (patch)
tree	95d57d0f88fc7e5c9fdbd9c3f2a1117dda1aa4ae
parent	d0063ea4ae7dbad8a9148ca3ebf6855cd2aa9af6 (diff)