summaryrefslogtreecommitdiff
path: root/dtrain/hgsampler.cc
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-11-13 16:17:02 +0100
committerPatrick Simianer <p@simianer.de>2011-11-13 16:17:02 +0100
commit853e07938ea3eaf0654911539bd5169a1b660220 (patch)
treeb8ed5de27564eb86862702ad65aac071d4cd3f5d /dtrain/hgsampler.cc
parenteffc9bfc40a0559ce36a155daa15e0dc53e93b75 (diff)
removed hgsampler, more stats, unit_weight_vector arg
Diffstat (limited to 'dtrain/hgsampler.cc')
-rw-r--r--dtrain/hgsampler.cc75
1 files changed, 0 insertions, 75 deletions
diff --git a/dtrain/hgsampler.cc b/dtrain/hgsampler.cc
deleted file mode 100644
index ad28b162..00000000
--- a/dtrain/hgsampler.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-// Chris Dyer
-#include "hgsampler.h"
-
-#include <queue>
-
-#include "viterbi.h"
-#include "inside_outside.h"
-
-using namespace std;
-
-struct SampledDerivationWeightFunction {
- typedef double Weight;
- explicit SampledDerivationWeightFunction(const vector<bool>& sampled) : sampled_edges(sampled) {}
- double operator()(const Hypergraph::Edge& e) const {
- return static_cast<double>(sampled_edges[e.id_]);
- }
- const vector<bool>& sampled_edges;
-};
-
-void HypergraphSampler::sample_hypotheses(const Hypergraph& hg,
- unsigned n,
- MT19937* rng,
- vector<Hypothesis>* hypos) {
- hypos->clear();
- hypos->resize(n);
-
- // compute inside probabilities
- vector<prob_t> node_probs;
- Inside<prob_t, EdgeProb>(hg, &node_probs, EdgeProb());
-
- vector<bool> sampled_edges(hg.edges_.size());
- queue<unsigned> q;
- SampleSet<prob_t> ss;
- for (unsigned i = 0; i < n; ++i) {
- fill(sampled_edges.begin(), sampled_edges.end(), false);
- // sample derivation top down
- assert(q.empty());
- Hypothesis& hyp = (*hypos)[i];
- SparseVector<double>& deriv_features = hyp.fmap;
- q.push(hg.nodes_.size() - 1);
- prob_t& model_score = hyp.model_score;
- model_score = prob_t::One();
- while(!q.empty()) {
- unsigned cur_node_id = q.front();
- q.pop();
- const Hypergraph::Node& node = hg.nodes_[cur_node_id];
- const unsigned num_in_edges = node.in_edges_.size();
- unsigned sampled_edge_idx = 0;
- if (num_in_edges == 1) {
- sampled_edge_idx = node.in_edges_[0];
- } else {
- assert(num_in_edges > 1);
- ss.clear();
- for (unsigned j = 0; j < num_in_edges; ++j) {
- const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]];
- prob_t p = edge.edge_prob_; // edge weight
- for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k)
- p *= node_probs[edge.tail_nodes_[k]]; // tail node inside weight
- ss.add(p);
- }
- sampled_edge_idx = node.in_edges_[rng->SelectSample(ss)];
- }
- sampled_edges[sampled_edge_idx] = true;
- const Hypergraph::Edge& sampled_edge = hg.edges_[sampled_edge_idx];
- deriv_features += sampled_edge.feature_values_;
- model_score *= sampled_edge.edge_prob_;
- //sampled_deriv->push_back(sampled_edge_idx);
- for (unsigned j = 0; j < sampled_edge.tail_nodes_.size(); ++j) {
- q.push(sampled_edge.tail_nodes_[j]);
- }
- }
- Viterbi(hg, &hyp.words, ESentenceTraversal(), SampledDerivationWeightFunction(sampled_edges));
- }
-}
-