summaryrefslogtreecommitdiff
path: root/dtrain/ksampler.h
diff options
context:
space:
mode:
Diffstat (limited to 'dtrain/ksampler.h')
-rw-r--r--dtrain/ksampler.h52
1 files changed, 52 insertions, 0 deletions
diff --git a/dtrain/ksampler.h b/dtrain/ksampler.h
new file mode 100644
index 00000000..0783f98b
--- /dev/null
+++ b/dtrain/ksampler.h
@@ -0,0 +1,52 @@
+#ifndef _DTRAIN_KSAMPLER_H_
+#define _DTRAIN_KSAMPLER_H_
+
+#include "hg_sampler.h" // cdec
+#include "kbestget.h"
+#include "score.h"
+
+namespace dtrain
+{
+
+
+struct KSampler : public HypSampler
+{
+ const unsigned k_;
+ vector<ScoredHyp> s_;
+ MT19937* prng_;
+ score_t (*scorer)(NgramCounts&, const unsigned, const unsigned, unsigned, vector<score_t>);
+ unsigned src_len_;
+
+ explicit KSampler(const unsigned k, MT19937* prng) :
+ k_(k), prng_(prng) {}
+
+ virtual void
+ NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg)
+ {
+ src_len_ = smeta.GetSourceLength();
+ ScoredSamples(*hg);
+ }
+
+ vector<ScoredHyp>* GetSamples() { return &s_; }
+
+ void ScoredSamples(const Hypergraph& forest) {
+ s_.clear();
+ std::vector<HypergraphSampler::Hypothesis> samples;
+ HypergraphSampler::sample_hypotheses(forest, k_, prng_, &samples);
+ for (unsigned i = 0; i < k_; ++i) {
+ ScoredHyp h;
+ h.w = samples[i].words;
+ h.f = samples[i].fmap;
+ h.model = log(samples[i].model_score);
+ h.rank = i;
+ h.score = scorer_->Score(h.w, *ref_, i, src_len_);
+ s_.push_back(h);
+ }
+ }
+};
+
+
+} // namespace
+
+#endif
+