From 1b8181bf0d6e9137e6b9ccdbe414aec37377a1a9 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 18 Nov 2012 13:35:42 -0500 Subject: major restructure of the training code --- training/dtrain/ksampler.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 training/dtrain/ksampler.h (limited to 'training/dtrain/ksampler.h') diff --git a/training/dtrain/ksampler.h b/training/dtrain/ksampler.h new file mode 100644 index 00000000..bc2f56cd --- /dev/null +++ b/training/dtrain/ksampler.h @@ -0,0 +1,61 @@ +#ifndef _DTRAIN_KSAMPLER_H_ +#define _DTRAIN_KSAMPLER_H_ + +#include "hg_sampler.h" // cdec +#include "kbestget.h" +#include "score.h" + +namespace dtrain +{ + +bool +cmp_hyp_by_model_d(ScoredHyp a, ScoredHyp b) +{ + return a.model > b.model; +} + +struct KSampler : public HypSampler +{ + const unsigned k_; + vector s_; + MT19937* prng_; + score_t (*scorer)(NgramCounts&, const unsigned, const unsigned, unsigned, vector); + unsigned src_len_; + + explicit KSampler(const unsigned k, MT19937* prng) : + k_(k), prng_(prng) {} + + virtual void + NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) + { + src_len_ = smeta.GetSourceLength(); + ScoredSamples(*hg); + } + + vector* GetSamples() { return &s_; } + + void ScoredSamples(const Hypergraph& forest) { + s_.clear(); sz_ = f_count_ = 0; + std::vector samples; + HypergraphSampler::sample_hypotheses(forest, k_, prng_, &samples); + for (unsigned i = 0; i < k_; ++i) { + ScoredHyp h; + h.w = samples[i].words; + h.f = samples[i].fmap; + h.model = log(samples[i].model_score); + h.rank = i; + h.score = scorer_->Score(h.w, *ref_, i, src_len_); + s_.push_back(h); + sz_++; + f_count_ += h.f.size(); + } + sort(s_.begin(), s_.end(), cmp_hyp_by_model_d); + for (unsigned i = 0; i < s_.size(); i++) s_[i].rank = i; + } +}; + + +} // namespace + +#endif + -- cgit v1.2.3