diff options
| -rw-r--r-- | configure.ac | 3 | ||||
| -rw-r--r-- | decoder/dict.h | 24 | ||||
| -rw-r--r-- | decoder/ff_bleu.cc | 2 | ||||
| -rw-r--r-- | decoder/ff_csplit.cc | 2 | ||||
| -rw-r--r-- | decoder/ff_lm.cc | 4 | ||||
| -rw-r--r-- | decoder/tdict.cc | 16 | ||||
| -rw-r--r-- | decoder/tdict.h | 2 | 
7 files changed, 37 insertions, 16 deletions
| diff --git a/configure.ac b/configure.ac index 5f45cdeb..54d0c6bf 100644 --- a/configure.ac +++ b/configure.ac @@ -17,6 +17,9 @@ LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS $BOOST_REGEX_LIBS $BOOST_THREAD_LIBS"  AC_CHECK_HEADER(boost/math/special_functions/digamma.hpp,                 [AC_DEFINE([HAVE_BOOST_DIGAMMA], [], [flag for boost::math::digamma])]) +AC_CHECK_HEADER(google/dense_hash_map, +               [AC_DEFINE([HAVE_SPARSEHASH], [], [flag for google::dense_hash_map])]) +  AC_PROG_INSTALL  GTEST_LIB_CHECK diff --git a/decoder/dict.h b/decoder/dict.h index 38231db6..82e8c0c0 100644 --- a/decoder/dict.h +++ b/decoder/dict.h @@ -3,7 +3,11 @@  #include <cassert>  #include <cstring> -#include <tr1/unordered_map> +#ifdef HAVE_SPARSEHASH +# include <google/dense_hash_map> +#else +# include <tr1/unordered_map> +#endif  #include <string>  #include <vector> @@ -12,10 +16,22 @@  #include "wordid.h"  class Dict { - typedef std::tr1::unordered_map<std::string, WordID, boost::hash<std::string> > Map; + typedef +#ifdef HAVE_SPARSEHASH + std::tr1::unordered_map +#else + google::dense_hash_map +#endif + <std::string, WordID, boost::hash<std::string> > Map;   public: -  Dict() : b0_("<bad0>") { words_.reserve(1000); } +  Dict() : b0_("<bad0>") { +#ifdef HAVE_SPARSEHASH +    d_.set_empty_key("<bad1>"); +    d_.set_deleted_key("<bad2>"); +#endif +    words_.reserve(1000); +  }    inline int max() const { return words_.size(); } @@ -32,7 +48,7 @@ class Dict {      }    } -  inline WordID Convert(const std::vector<std::string>& words, bool frozen = false)  +  inline WordID Convert(const std::vector<std::string>& words, bool frozen = false)    { return Convert(toString(words), frozen); }    static inline std::string toString(const std::vector<std::string>& words) { diff --git a/decoder/ff_bleu.cc b/decoder/ff_bleu.cc index 19564bd0..12c29d32 100644 --- a/decoder/ff_bleu.cc +++ b/decoder/ff_bleu.cc @@ -25,7 +25,7 @@ using namespace std;  class BLEUModelImpl {   public:    explicit BLEUModelImpl(int order) : -      ngram_(*TD::dict_, order), buffer_(), order_(order), state_size_(OrderToStateSize(order) - 1), +      ngram_(TD::dict_, order), buffer_(), order_(order), state_size_(OrderToStateSize(order) - 1),        floor_(-100.0),        kSTART(TD::Convert("<s>")),        kSTOP(TD::Convert("</s>")), diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc index 658603e4..f267f8e8 100644 --- a/decoder/ff_csplit.cc +++ b/decoder/ff_csplit.cc @@ -149,7 +149,7 @@ void BasicCSplitFeatures::TraversalFeaturesImpl(  struct ReverseCharLMCSplitFeatureImpl {    ReverseCharLMCSplitFeatureImpl(const string& param) :        order_(5), -      vocab_(*TD::dict_), +      vocab_(TD::dict_),        ngram_(vocab_, order_) {      kBOS = vocab_.getIndex("<s>");      kEOS = vocab_.getIndex("</s>"); diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc index bbf63338..2f0277c8 100644 --- a/decoder/ff_lm.cc +++ b/decoder/ff_lm.cc @@ -192,7 +192,7 @@ class LanguageModelImpl {    }   public: -  explicit LanguageModelImpl(int order) : ngram_(*TD::dict_, order) +  explicit LanguageModelImpl(int order) : ngram_(TD::dict_, order)    {      init(order);    } @@ -200,7 +200,7 @@ class LanguageModelImpl {  //TODO: show that unigram special case (0 state) computes what it should.    LanguageModelImpl(int order, const string& f, int load_order=0) : -    ngram_(*TD::dict_, load_order ? load_order : order) +    ngram_(TD::dict_, load_order ? load_order : order)    {      init(order);      File file(f.c_str(), "r", 0); diff --git a/decoder/tdict.cc b/decoder/tdict.cc index ac590bd8..93f7b0eb 100644 --- a/decoder/tdict.cc +++ b/decoder/tdict.cc @@ -5,23 +5,25 @@  using namespace std; -Vocab* TD::dict_ = new Vocab; - -static const string empty; -static const string space = " "; +//FIXME: valgrind errors (static init order?) +Vocab TD::dict_;  unsigned int TD::NumWords() { -  return dict_->numWords(); +  return dict_.numWords();  }  WordID TD::Convert(const std::string& s) { -  return dict_->addWord((VocabString)s.c_str()); +  return dict_.addWord((VocabString)s.c_str());  }  const char* TD::Convert(const WordID& w) { -  return dict_->getWord((VocabIndex)w); +  return dict_.getWord((VocabIndex)w);  } +static const string empty; +static const string space = " "; + +  void TD::GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids) {    ids->clear();    for (vector<string>::const_iterator i = strings.begin(); i != strings.end(); ++i) diff --git a/decoder/tdict.h b/decoder/tdict.h index 1fba5179..af1612ba 100644 --- a/decoder/tdict.h +++ b/decoder/tdict.h @@ -8,7 +8,7 @@  class Vocab;  struct TD { -  static Vocab* dict_; +  static Vocab dict_;    static void ConvertSentence(const std::string& sent, std::vector<WordID>* ids);    static void GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids);    static std::string GetString(const std::vector<WordID>& str); | 
