diff options
-rw-r--r-- | configure.ac | 3 | ||||
-rw-r--r-- | decoder/dict.h | 24 | ||||
-rw-r--r-- | decoder/ff_bleu.cc | 2 | ||||
-rw-r--r-- | decoder/ff_csplit.cc | 2 | ||||
-rw-r--r-- | decoder/ff_lm.cc | 4 | ||||
-rw-r--r-- | decoder/tdict.cc | 16 | ||||
-rw-r--r-- | decoder/tdict.h | 2 |
7 files changed, 37 insertions, 16 deletions
diff --git a/configure.ac b/configure.ac index 5f45cdeb..54d0c6bf 100644 --- a/configure.ac +++ b/configure.ac @@ -17,6 +17,9 @@ LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS $BOOST_REGEX_LIBS $BOOST_THREAD_LIBS" AC_CHECK_HEADER(boost/math/special_functions/digamma.hpp, [AC_DEFINE([HAVE_BOOST_DIGAMMA], [], [flag for boost::math::digamma])]) +AC_CHECK_HEADER(google/dense_hash_map, + [AC_DEFINE([HAVE_SPARSEHASH], [], [flag for google::dense_hash_map])]) + AC_PROG_INSTALL GTEST_LIB_CHECK diff --git a/decoder/dict.h b/decoder/dict.h index 38231db6..82e8c0c0 100644 --- a/decoder/dict.h +++ b/decoder/dict.h @@ -3,7 +3,11 @@ #include <cassert> #include <cstring> -#include <tr1/unordered_map> +#ifdef HAVE_SPARSEHASH +# include <google/dense_hash_map> +#else +# include <tr1/unordered_map> +#endif #include <string> #include <vector> @@ -12,10 +16,22 @@ #include "wordid.h" class Dict { - typedef std::tr1::unordered_map<std::string, WordID, boost::hash<std::string> > Map; + typedef +#ifdef HAVE_SPARSEHASH + std::tr1::unordered_map +#else + google::dense_hash_map +#endif + <std::string, WordID, boost::hash<std::string> > Map; public: - Dict() : b0_("<bad0>") { words_.reserve(1000); } + Dict() : b0_("<bad0>") { +#ifdef HAVE_SPARSEHASH + d_.set_empty_key("<bad1>"); + d_.set_deleted_key("<bad2>"); +#endif + words_.reserve(1000); + } inline int max() const { return words_.size(); } @@ -32,7 +48,7 @@ class Dict { } } - inline WordID Convert(const std::vector<std::string>& words, bool frozen = false) + inline WordID Convert(const std::vector<std::string>& words, bool frozen = false) { return Convert(toString(words), frozen); } static inline std::string toString(const std::vector<std::string>& words) { diff --git a/decoder/ff_bleu.cc b/decoder/ff_bleu.cc index 19564bd0..12c29d32 100644 --- a/decoder/ff_bleu.cc +++ b/decoder/ff_bleu.cc @@ -25,7 +25,7 @@ using namespace std; class BLEUModelImpl { public: explicit BLEUModelImpl(int order) : - ngram_(*TD::dict_, order), buffer_(), order_(order), state_size_(OrderToStateSize(order) - 1), + ngram_(TD::dict_, order), buffer_(), order_(order), state_size_(OrderToStateSize(order) - 1), floor_(-100.0), kSTART(TD::Convert("<s>")), kSTOP(TD::Convert("</s>")), diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc index 658603e4..f267f8e8 100644 --- a/decoder/ff_csplit.cc +++ b/decoder/ff_csplit.cc @@ -149,7 +149,7 @@ void BasicCSplitFeatures::TraversalFeaturesImpl( struct ReverseCharLMCSplitFeatureImpl { ReverseCharLMCSplitFeatureImpl(const string& param) : order_(5), - vocab_(*TD::dict_), + vocab_(TD::dict_), ngram_(vocab_, order_) { kBOS = vocab_.getIndex("<s>"); kEOS = vocab_.getIndex("</s>"); diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc index bbf63338..2f0277c8 100644 --- a/decoder/ff_lm.cc +++ b/decoder/ff_lm.cc @@ -192,7 +192,7 @@ class LanguageModelImpl { } public: - explicit LanguageModelImpl(int order) : ngram_(*TD::dict_, order) + explicit LanguageModelImpl(int order) : ngram_(TD::dict_, order) { init(order); } @@ -200,7 +200,7 @@ class LanguageModelImpl { //TODO: show that unigram special case (0 state) computes what it should. LanguageModelImpl(int order, const string& f, int load_order=0) : - ngram_(*TD::dict_, load_order ? load_order : order) + ngram_(TD::dict_, load_order ? load_order : order) { init(order); File file(f.c_str(), "r", 0); diff --git a/decoder/tdict.cc b/decoder/tdict.cc index ac590bd8..93f7b0eb 100644 --- a/decoder/tdict.cc +++ b/decoder/tdict.cc @@ -5,23 +5,25 @@ using namespace std; -Vocab* TD::dict_ = new Vocab; - -static const string empty; -static const string space = " "; +//FIXME: valgrind errors (static init order?) +Vocab TD::dict_; unsigned int TD::NumWords() { - return dict_->numWords(); + return dict_.numWords(); } WordID TD::Convert(const std::string& s) { - return dict_->addWord((VocabString)s.c_str()); + return dict_.addWord((VocabString)s.c_str()); } const char* TD::Convert(const WordID& w) { - return dict_->getWord((VocabIndex)w); + return dict_.getWord((VocabIndex)w); } +static const string empty; +static const string space = " "; + + void TD::GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids) { ids->clear(); for (vector<string>::const_iterator i = strings.begin(); i != strings.end(); ++i) diff --git a/decoder/tdict.h b/decoder/tdict.h index 1fba5179..af1612ba 100644 --- a/decoder/tdict.h +++ b/decoder/tdict.h @@ -8,7 +8,7 @@ class Vocab; struct TD { - static Vocab* dict_; + static Vocab dict_; static void ConvertSentence(const std::string& sent, std::vector<WordID>* ids); static void GetWordIDs(const std::vector<std::string>& strings, std::vector<WordID>* ids); static std::string GetString(const std::vector<WordID>& str); |