diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-11-05 15:29:46 +0100 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-11-05 15:29:46 +0100 |
commit | 6f29f345dc06c1a1033475eac1d1340781d1d603 (patch) | |
tree | 6fa4cdd7aefd7d54c9585c2c6274db61bb8b159a /decoder/hg_sampler.cc | |
parent | b510da2e562c695c90d565eb295c749569c59be8 (diff) | |
parent | c615c37501fa8576584a510a9d2bfe2fdd5bace7 (diff) |
merge upstream/master
Diffstat (limited to 'decoder/hg_sampler.cc')
-rw-r--r-- | decoder/hg_sampler.cc | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/decoder/hg_sampler.cc b/decoder/hg_sampler.cc index cdf0ec3c..8e520871 100644 --- a/decoder/hg_sampler.cc +++ b/decoder/hg_sampler.cc @@ -71,3 +71,58 @@ void HypergraphSampler::sample_hypotheses(const Hypergraph& hg, Viterbi(hg, &hyp.words, ESentenceTraversal(), SampledDerivationWeightFunction(sampled_edges)); } } + +void HypergraphSampler::sample_trees(const Hypergraph& hg, + unsigned n, + MT19937* rng, + vector<string>* trees) { + trees->clear(); + trees->resize(n); + + // compute inside probabilities + vector<prob_t> node_probs; + Inside<prob_t, EdgeProb>(hg, &node_probs, EdgeProb()); + + vector<bool> sampled_edges(hg.edges_.size()); + queue<unsigned> q; + SampleSet<prob_t> ss; + for (unsigned i = 0; i < n; ++i) { + fill(sampled_edges.begin(), sampled_edges.end(), false); + // sample derivation top down + assert(q.empty()); + q.push(hg.nodes_.size() - 1); + prob_t model_score = prob_t::One(); + while(!q.empty()) { + unsigned cur_node_id = q.front(); + q.pop(); + const Hypergraph::Node& node = hg.nodes_[cur_node_id]; + const unsigned num_in_edges = node.in_edges_.size(); + unsigned sampled_edge_idx = 0; + if (num_in_edges == 1) { + sampled_edge_idx = node.in_edges_[0]; + } else { + assert(num_in_edges > 1); + ss.clear(); + for (unsigned j = 0; j < num_in_edges; ++j) { + const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]]; + prob_t p = edge.edge_prob_; // edge weight + for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k) + p *= node_probs[edge.tail_nodes_[k]]; // tail node inside weight + ss.add(p); + } + sampled_edge_idx = node.in_edges_[rng->SelectSample(ss)]; + } + sampled_edges[sampled_edge_idx] = true; + const Hypergraph::Edge& sampled_edge = hg.edges_[sampled_edge_idx]; + model_score *= sampled_edge.edge_prob_; + //sampled_deriv->push_back(sampled_edge_idx); + for (unsigned j = 0; j < sampled_edge.tail_nodes_.size(); ++j) { + q.push(sampled_edge.tail_nodes_[j]); + } + } + vector<WordID> tmp; + Viterbi(hg, &tmp, ETreeTraversal(), SampledDerivationWeightFunction(sampled_edges)); + (*trees)[i] = TD::GetString(tmp); + } +} + |