diff options
Diffstat (limited to 'dtrain/ksampler.h')
-rw-r--r-- | dtrain/ksampler.h | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/dtrain/ksampler.h b/dtrain/ksampler.h new file mode 100644 index 00000000..0783f98b --- /dev/null +++ b/dtrain/ksampler.h @@ -0,0 +1,52 @@ +#ifndef _DTRAIN_KSAMPLER_H_ +#define _DTRAIN_KSAMPLER_H_ + +#include "hg_sampler.h" // cdec +#include "kbestget.h" +#include "score.h" + +namespace dtrain +{ + + +struct KSampler : public HypSampler +{ + const unsigned k_; + vector<ScoredHyp> s_; + MT19937* prng_; + score_t (*scorer)(NgramCounts&, const unsigned, const unsigned, unsigned, vector<score_t>); + unsigned src_len_; + + explicit KSampler(const unsigned k, MT19937* prng) : + k_(k), prng_(prng) {} + + virtual void + NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg) + { + src_len_ = smeta.GetSourceLength(); + ScoredSamples(*hg); + } + + vector<ScoredHyp>* GetSamples() { return &s_; } + + void ScoredSamples(const Hypergraph& forest) { + s_.clear(); + std::vector<HypergraphSampler::Hypothesis> samples; + HypergraphSampler::sample_hypotheses(forest, k_, prng_, &samples); + for (unsigned i = 0; i < k_; ++i) { + ScoredHyp h; + h.w = samples[i].words; + h.f = samples[i].fmap; + h.model = log(samples[i].model_score); + h.rank = i; + h.score = scorer_->Score(h.w, *ref_, i, src_len_); + s_.push_back(h); + } + } +}; + + +} // namespace + +#endif + |