diff options
author | Kenneth Heafield <kenlm@kheafield.com> | 2011-09-23 16:38:01 -0400 |
---|---|---|
committer | Kenneth Heafield <kenlm@kheafield.com> | 2011-09-23 16:38:01 -0400 |
commit | d71c74f3924e6c207f3ebfab470b9a30e2551dde (patch) | |
tree | 1ae0c7a04b036d4724497deab09203316004a459 /decoder | |
parent | fcbb924a575df56de53eacce886ebf9ccf3283ed (diff) |
Go through ff_klm and try to fix it for the new version.
Diffstat (limited to 'decoder')
-rw-r--r-- | decoder/ff_klm.cc | 36 |
1 files changed, 9 insertions, 27 deletions
diff --git a/decoder/ff_klm.cc b/decoder/ff_klm.cc index 016aad26..3b2113ad 100644 --- a/decoder/ff_klm.cc +++ b/decoder/ff_klm.cc @@ -90,19 +90,12 @@ class KLanguageModelImpl { return *static_cast<const lm::ngram::ChartState*>(state); } - inline void SetRemnantLMState(const lm::ngram::ChartState& lmstate, void* state) const { - // if we were clever, we could use the memory pointed to by state to do all - // the work, avoiding this copy - memcpy(state, &lmstate, ngram_->StateSize()); - } - public: double LookupWords(const TRule& rule, const vector<const void*>& ant_states, double* oovs, void* remnant) { - double sum = 0.0; if (oovs) *oovs = 0; const vector<WordID>& e = rule.e(); lm::ngram::ChartState state; - lm::ngram::RuleScore<Model> ruleScore(*ngram_, state); + lm::ngram::RuleScore<Model> ruleScore(*ngram_, remnant ? *static_cast<lm::ngram::ChartState*>(remnant) : state); unsigned i = 0; if (e.size()) { if (e[i] == kCDEC_SOS) { @@ -123,12 +116,13 @@ class KLanguageModelImpl { // maybe handle emission const lm::WordIndex cur_word = MapWord(cdec_word_or_class); // map to LM's id const bool is_oov = (cur_word == 0); - if (is_oov) (*oovs) += 1.0; + if (is_oov && oovs) (*oovs) += 1.0; ruleScore.Terminal(cur_word); } } - if (remnant) SetRemnantLMState(state, remnant); - return ruleScore.Finish(); + double ret = ruleScore.Finish(); + state.ZeroRemaining(); + return ret; } // this assumes no target words on final unary -> goal rule. is that ok? @@ -138,10 +132,9 @@ class KLanguageModelImpl { lm::ngram::ChartState cstate; lm::ngram::RuleScore<Model> ruleScore(*ngram_, cstate); ruleScore.BeginSentence(); - SetRemnantLMState(cstate, dummy_state_); - dummy_ants_[1] = state; - *oovs = 0; - return LookupWords(*dummy_rule_, dummy_ants_, oovs, NULL); + ruleScore.NonTerminal(RemnantLMState(state), 0.0f); + ruleScore.Terminal(kEOS_); + return ruleScore.Finish(); } else { // rules DO produce <s> ... </s> double p = 0; cerr << "not implemented"; abort(); // TODO @@ -187,14 +180,8 @@ class KLanguageModelImpl { } order_ = ngram_->Order(); cerr << "Loaded " << order_ << "-gram KLM from " << filename << " (MapSize=" << cdec2klm_map_.size() << ")\n"; - state_size_ = sizeof(lm::ngram::ChartState); // special handling of beginning / ending sentence markers - dummy_state_ = new char[state_size_]; - memset(dummy_state_, 0, state_size_); - dummy_ants_.push_back(dummy_state_); - dummy_ants_.push_back(NULL); - dummy_rule_.reset(new TRule("[DUMMY] ||| [BOS] [DUMMY] ||| [1] [2] </s> ||| X=0")); kSOS_ = MapWord(kCDEC_SOS); assert(kSOS_ > 0); kEOS_ = MapWord(TD::Convert("</s>")); @@ -243,10 +230,9 @@ class KLanguageModelImpl { ~KLanguageModelImpl() { delete ngram_; - delete[] dummy_state_; } - int ReserveStateSize() const { return state_size_; } + int ReserveStateSize() const { return sizeof(lm::ngram::ChartState); } private: const WordID kCDEC_UNK; @@ -261,12 +247,8 @@ class KLanguageModelImpl { // the sentence) with 0, and anything else with -100 int order_; - int state_size_; - char* dummy_state_; - vector<const void*> dummy_ants_; vector<lm::WordIndex> cdec2klm_map_; vector<WordID> word2class_map_; // if this is a class-based LM, this is the word->class mapping - TRulePtr dummy_rule_; }; template <class Model> |