1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
#include "hg_sampler.h"
#include <queue>
#include "viterbi.h"
#include "inside_outside.h"
using namespace std;
struct SampledDerivationWeightFunction {
typedef double Weight;
explicit SampledDerivationWeightFunction(const vector<bool>& sampled) : sampled_edges(sampled) {}
double operator()(const Hypergraph::Edge& e) const {
return static_cast<double>(sampled_edges[e.id_]);
}
const vector<bool>& sampled_edges;
};
void HypergraphSampler::sample_hypotheses(const Hypergraph& hg,
unsigned n,
MT19937* rng,
vector<Hypothesis>* hypos) {
hypos->clear();
hypos->resize(n);
// compute inside probabilities
vector<prob_t> node_probs;
Inside<prob_t, EdgeProb>(hg, &node_probs, EdgeProb());
vector<bool> sampled_edges(hg.edges_.size());
queue<unsigned> q;
SampleSet<prob_t> ss;
for (unsigned i = 0; i < n; ++i) {
fill(sampled_edges.begin(), sampled_edges.end(), false);
// sample derivation top down
assert(q.empty());
Hypothesis& hyp = (*hypos)[i];
SparseVector<double>& deriv_features = hyp.fmap;
q.push(hg.nodes_.size() - 1);
prob_t& model_score = hyp.model_score;
model_score = prob_t::One();
while(!q.empty()) {
unsigned cur_node_id = q.front();
q.pop();
const Hypergraph::Node& node = hg.nodes_[cur_node_id];
const unsigned num_in_edges = node.in_edges_.size();
unsigned sampled_edge_idx = 0;
if (num_in_edges == 1) {
sampled_edge_idx = node.in_edges_[0];
} else {
assert(num_in_edges > 1);
ss.clear();
for (unsigned j = 0; j < num_in_edges; ++j) {
const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]];
prob_t p = edge.edge_prob_; // edge weight
for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k)
p *= node_probs[edge.tail_nodes_[k]]; // tail node inside weight
ss.add(p);
}
sampled_edge_idx = node.in_edges_[rng->SelectSample(ss)];
}
sampled_edges[sampled_edge_idx] = true;
const Hypergraph::Edge& sampled_edge = hg.edges_[sampled_edge_idx];
deriv_features += sampled_edge.feature_values_;
model_score *= sampled_edge.edge_prob_;
//sampled_deriv->push_back(sampled_edge_idx);
for (unsigned j = 0; j < sampled_edge.tail_nodes_.size(); ++j) {
q.push(sampled_edge.tail_nodes_[j]);
}
}
Viterbi(hg, &hyp.words, ESentenceTraversal(), SampledDerivationWeightFunction(sampled_edges));
}
}
void HypergraphSampler::sample_trees(const Hypergraph& hg,
unsigned n,
MT19937* rng,
vector<string>* trees) {
trees->clear();
trees->resize(n);
// compute inside probabilities
vector<prob_t> node_probs;
Inside<prob_t, EdgeProb>(hg, &node_probs, EdgeProb());
vector<bool> sampled_edges(hg.edges_.size());
queue<unsigned> q;
SampleSet<prob_t> ss;
for (unsigned i = 0; i < n; ++i) {
fill(sampled_edges.begin(), sampled_edges.end(), false);
// sample derivation top down
assert(q.empty());
q.push(hg.nodes_.size() - 1);
prob_t model_score = prob_t::One();
while(!q.empty()) {
unsigned cur_node_id = q.front();
q.pop();
const Hypergraph::Node& node = hg.nodes_[cur_node_id];
const unsigned num_in_edges = node.in_edges_.size();
unsigned sampled_edge_idx = 0;
if (num_in_edges == 1) {
sampled_edge_idx = node.in_edges_[0];
} else {
assert(num_in_edges > 1);
ss.clear();
for (unsigned j = 0; j < num_in_edges; ++j) {
const Hypergraph::Edge& edge = hg.edges_[node.in_edges_[j]];
prob_t p = edge.edge_prob_; // edge weight
for (unsigned k = 0; k < edge.tail_nodes_.size(); ++k)
p *= node_probs[edge.tail_nodes_[k]]; // tail node inside weight
ss.add(p);
}
sampled_edge_idx = node.in_edges_[rng->SelectSample(ss)];
}
sampled_edges[sampled_edge_idx] = true;
const Hypergraph::Edge& sampled_edge = hg.edges_[sampled_edge_idx];
model_score *= sampled_edge.edge_prob_;
//sampled_deriv->push_back(sampled_edge_idx);
for (unsigned j = 0; j < sampled_edge.tail_nodes_.size(); ++j) {
q.push(sampled_edge.tail_nodes_[j]);
}
}
vector<WordID> tmp;
Viterbi(hg, &tmp, ETreeTraversal(), SampledDerivationWeightFunction(sampled_edges));
(*trees)[n] = TD::GetString(tmp);
}
}
|