From f355bdd04a335fffa089dd3d8095461ec0667f78 Mon Sep 17 00:00:00 2001 From: graehl Date: Tue, 20 Jul 2010 19:43:31 +0000 Subject: optional google dense_hash_map, srim vocab (TDICT) as static - may help valgrind warning? git-svn-id: https://ws10smt.googlecode.com/svn/trunk@339 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/dict.h | 24 ++++++++++++++++++++---- decoder/ff_bleu.cc | 2 +- decoder/ff_csplit.cc | 2 +- decoder/ff_lm.cc | 4 ++-- decoder/tdict.cc | 16 +++++++++------- decoder/tdict.h | 2 +- 6 files changed, 34 insertions(+), 16 deletions(-) (limited to 'decoder') diff --git a/decoder/dict.h b/decoder/dict.h index 38231db6..82e8c0c0 100644 --- a/decoder/dict.h +++ b/decoder/dict.h @@ -3,7 +3,11 @@ #include #include -#include +#ifdef HAVE_SPARSEHASH +# include +#else +# include +#endif #include #include @@ -12,10 +16,22 @@ #include "wordid.h" class Dict { - typedef std::tr1::unordered_map > Map; + typedef +#ifdef HAVE_SPARSEHASH + std::tr1::unordered_map +#else + google::dense_hash_map +#endif + > Map; public: - Dict() : b0_("") { words_.reserve(1000); } + Dict() : b0_("") { +#ifdef HAVE_SPARSEHASH + d_.set_empty_key(""); + d_.set_deleted_key(""); +#endif + words_.reserve(1000); + } inline int max() const { return words_.size(); } @@ -32,7 +48,7 @@ class Dict { } } - inline WordID Convert(const std::vector& words, bool frozen = false) + inline WordID Convert(const std::vector& words, bool frozen = false) { return Convert(toString(words), frozen); } static inline std::string toString(const std::vector& words) { diff --git a/decoder/ff_bleu.cc b/decoder/ff_bleu.cc index 19564bd0..12c29d32 100644 --- a/decoder/ff_bleu.cc +++ b/decoder/ff_bleu.cc @@ -25,7 +25,7 @@ using namespace std; class BLEUModelImpl { public: explicit BLEUModelImpl(int order) : - ngram_(*TD::dict_, order), buffer_(), order_(order), state_size_(OrderToStateSize(order) - 1), + ngram_(TD::dict_, order), buffer_(), order_(order), state_size_(OrderToStateSize(order) - 1), floor_(-100.0), kSTART(TD::Convert("")), kSTOP(TD::Convert("")), diff --git a/decoder/ff_csplit.cc b/decoder/ff_csplit.cc index 658603e4..f267f8e8 100644 --- a/decoder/ff_csplit.cc +++ b/decoder/ff_csplit.cc @@ -149,7 +149,7 @@ void BasicCSplitFeatures::TraversalFeaturesImpl( struct ReverseCharLMCSplitFeatureImpl { ReverseCharLMCSplitFeatureImpl(const string& param) : order_(5), - vocab_(*TD::dict_), + vocab_(TD::dict_), ngram_(vocab_, order_) { kBOS = vocab_.getIndex(""); kEOS = vocab_.getIndex(""); diff --git a/decoder/ff_lm.cc b/decoder/ff_lm.cc index bbf63338..2f0277c8 100644 --- a/decoder/ff_lm.cc +++ b/decoder/ff_lm.cc @@ -192,7 +192,7 @@ class LanguageModelImpl { } public: - explicit LanguageModelImpl(int order) : ngram_(*TD::dict_, order) + explicit LanguageModelImpl(int order) : ngram_(TD::dict_, order) { init(order); } @@ -200,7 +200,7 @@ class LanguageModelImpl { //TODO: show that unigram special case (0 state) computes what it should. LanguageModelImpl(int order, const string& f, int load_order=0) : - ngram_(*TD::dict_, load_order ? load_order : order) + ngram_(TD::dict_, load_order ? load_order : order) { init(order); File file(f.c_str(), "r", 0); diff --git a/decoder/tdict.cc b/decoder/tdict.cc index ac590bd8..93f7b0eb 100644 --- a/decoder/tdict.cc +++ b/decoder/tdict.cc @@ -5,23 +5,25 @@ using namespace std; -Vocab* TD::dict_ = new Vocab; - -static const string empty; -static const string space = " "; +//FIXME: valgrind errors (static init order?) +Vocab TD::dict_; unsigned int TD::NumWords() { - return dict_->numWords(); + return dict_.numWords(); } WordID TD::Convert(const std::string& s) { - return dict_->addWord((VocabString)s.c_str()); + return dict_.addWord((VocabString)s.c_str()); } const char* TD::Convert(const WordID& w) { - return dict_->getWord((VocabIndex)w); + return dict_.getWord((VocabIndex)w); } +static const string empty; +static const string space = " "; + + void TD::GetWordIDs(const std::vector& strings, std::vector* ids) { ids->clear(); for (vector::const_iterator i = strings.begin(); i != strings.end(); ++i) diff --git a/decoder/tdict.h b/decoder/tdict.h index 1fba5179..af1612ba 100644 --- a/decoder/tdict.h +++ b/decoder/tdict.h @@ -8,7 +8,7 @@ class Vocab; struct TD { - static Vocab* dict_; + static Vocab dict_; static void ConvertSentence(const std::string& sent, std::vector* ids); static void GetWordIDs(const std::vector& strings, std::vector* ids); static std::string GetString(const std::vector& str); -- cgit v1.2.3