summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-03-24 18:04:06 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2011-03-24 18:04:06 -0400
commitafb41a09cc10db8b47047630c8db3148dfa5f648 (patch)
tree971a3de0c97384e38f38906a03248630ab38113d
parentfa45c8489b2f016766a7a946eb7ba2b9140749ed (diff)
various summary feature types, part 1
-rw-r--r--decoder/decoder.cc108
1 files changed, 73 insertions, 35 deletions
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index b7774acc..fdaf8cb1 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -66,6 +66,13 @@ void DecoderObserver::NotifyAlignmentFailure(const SentenceMetadata&) {}
void DecoderObserver::NotifyAlignmentForest(const SentenceMetadata&, Hypergraph*) {}
void DecoderObserver::NotifyDecodingComplete(const SentenceMetadata&) {}
+enum SummaryFeature {
+ kNODE_RISK = 1,
+ kEDGE_RISK,
+ kEDGE_PROB
+};
+
+
struct ELengthWeightFunction {
double operator()(const Hypergraph::Edge& e) const {
return e.rule_->ELength() - e.rule_->Arity();
@@ -364,6 +371,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
("feature_function,F",po::value<vector<string> >()->composing(), "Pass 1 additional feature function(s) (-L for list)")
("intersection_strategy,I",po::value<string>()->default_value("cube_pruning"), "Pass 1 intersection strategy for incorporating finite-state features; values include Cube_pruning, Full")
("summary_feature", po::value<string>(), "Compute a 'summary feature' at the end of the pass (before any pruning) with name=arg and value=inside-outside/Z")
+ ("summary_feature_type", po::value<string>()->default_value("node_risk"), "Summary feature types: node_risk, edge_risk, edge_prob")
("density_prune", po::value<double>(), "Pass 1 pruning: keep no more than this many times the number of edges used in the best derivation tree (>=1.0)")
("beam_prune", po::value<double>(), "Pass 1 pruning: Prune paths from scored forest, keep paths within exp(alpha>=0)")
@@ -386,8 +394,8 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream
("apply_fsa_by",po::value<string>()->default_value("BU_CUBE"), "Method for applying fsa_feature_functions - BU_FULL BU_CUBE EARLEY") //+ApplyFsaBy::all_names()
#endif
("add_pass_through_rules,P","Add rules to translate OOV words as themselves")
- ("k_best,k",po::value<int>(),"Extract the k best derivations")
- ("unique_k_best,r", "Unique k-best translation list")
+ ("k_best,k",po::value<int>(),"Extract the k best derivations")
+ ("unique_k_best,r", "Unique k-best translation list")
("cubepruning_pop_limit,K",po::value<int>()->default_value(200), "Max number of pops from the candidate heap at each node")
("aligner,a", "Run as a word/phrase aligner (src & ref required)")
("aligner_use_viterbi", "If run in alignment mode, compute the Viterbi (rather than MAP) alignment")
@@ -775,6 +783,18 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
cerr << " Expected length (words): " << res.r / res.p << "\t" << res << endl;
}
+ SummaryFeature summary_feature_type = kNODE_RISK;
+ if (conf["summary_feature_type"].as<string>() == "edge_risk")
+ summary_feature_type = kEDGE_RISK;
+ else if (conf["summary_feature_type"].as<string>() == "node_risk")
+ summary_feature_type = kNODE_RISK;
+ else if (conf["summary_feature_type"].as<string>() == "edge_prob")
+ summary_feature_type = kEDGE_PROB;
+ else {
+ cerr << "Bad summary_feature_type: " << conf["summary_feature_type"].as<string>() << endl;
+ abort();
+ }
+
for (int pass = 0; pass < rescoring_passes.size(); ++pass) {
const RescoringPass& rp = rescoring_passes[pass];
const vector<double>& cur_weights = rp.weight_vector;
@@ -806,43 +826,61 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
}
if (rp.fid_summary) {
-#if 0
- const prob_t z = forest.PushWeightsToGoal(1.0);
- if (!SILENT) { cerr << " " << passtr << " adding summary feature " << FD::Convert(rp.fid_summary) << " log(Z)=" << log(z) << endl; }
- if (!isfinite(log(z)) || isnan(log(z))) {
- cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n";
- } else {
- for (int i = 0; i < forest.edges_.size(); ++i) {
- const double log_prob_transition = log(forest.edges_[i].edge_prob_); // locally normalized by the edge
- // head node by forest.PushWeightsToGoal
- if (!isfinite(log_prob_transition) || isnan(log_prob_transition)) {
- cerr << "Edge: i=" << i << " got bad inside prob: " << *forest.edges_[i].rule_ << endl;
- abort();
+ if (summary_feature_type == kEDGE_PROB) {
+ const prob_t z = forest.PushWeightsToGoal(1.0);
+ if (!isfinite(log(z)) || isnan(log(z))) {
+ cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n";
+ } else {
+ for (int i = 0; i < forest.edges_.size(); ++i) {
+ const double log_prob_transition = log(forest.edges_[i].edge_prob_); // locally normalized by the edge
+ // head node by forest.PushWeightsToGoal
+ if (!isfinite(log_prob_transition) || isnan(log_prob_transition)) {
+ cerr << "Edge: i=" << i << " got bad inside prob: " << *forest.edges_[i].rule_ << endl;
+ abort();
+ }
+
+ forest.edges_[i].feature_values_.set_value(rp.fid_summary, log_prob_transition);
}
-
- forest.edges_[i].feature_values_.set_value(rp.fid_summary, log_prob_transition);
+ forest.Reweight(cur_weights); // reset weights
}
- forest.Reweight(cur_weights); // reset weights
- }
-#endif
- Hypergraph::EdgeProbs posts;
- const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts);
- if (!isfinite(log(z)) || isnan(log(z))) {
- cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n";
- } else {
- for (int i = 0; i < forest.nodes_.size(); ++i) {
- const Hypergraph::EdgesVector& in_edges = forest.nodes_[i].in_edges_;
- prob_t node_post = prob_t(0);
- for (int j = 0; j < in_edges.size(); ++j)
- node_post += (posts[in_edges[j]] / z);
- const double log_np = log(node_post);
- if (!isfinite(log_np) || isnan(log_np)) {
- cerr << "got bad posterior prob for node " << i << endl;
- abort();
+ } else if (summary_feature_type == kNODE_RISK) {
+ Hypergraph::EdgeProbs posts;
+ const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts);
+ if (!isfinite(log(z)) || isnan(log(z))) {
+ cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n";
+ } else {
+ for (int i = 0; i < forest.nodes_.size(); ++i) {
+ const Hypergraph::EdgesVector& in_edges = forest.nodes_[i].in_edges_;
+ prob_t node_post = prob_t(0);
+ for (int j = 0; j < in_edges.size(); ++j)
+ node_post += (posts[in_edges[j]] / z);
+ const double log_np = log(node_post);
+ if (!isfinite(log_np) || isnan(log_np)) {
+ cerr << "got bad posterior prob for node " << i << endl;
+ abort();
+ }
+ for (int j = 0; j < in_edges.size(); ++j)
+ forest.edges_[in_edges[j]].feature_values_.set_value(rp.fid_summary, exp(log_np));
}
- for (int j = 0; j < in_edges.size(); ++j)
- forest.edges_[in_edges[j]].feature_values_.set_value(rp.fid_summary, exp(log_np));
}
+ } else if (summary_feature_type == kEDGE_RISK) {
+ Hypergraph::EdgeProbs posts;
+ const prob_t z = forest.ComputeEdgePosteriors(1.0, &posts);
+ if (!isfinite(log(z)) || isnan(log(z))) {
+ cerr << " " << passtr << " !!! Invalid partition detected, abandoning.\n";
+ } else {
+ assert(posts.size() == forest.edges_.size());
+ for (int i = 0; i < posts.size(); ++i) {
+ const double log_np = log(posts[i] / z);
+ if (!isfinite(log_np) || isnan(log_np)) {
+ cerr << "got bad posterior prob for node " << i << endl;
+ abort();
+ }
+ forest.edges_[i].feature_values_.set_value(rp.fid_summary, exp(log_np));
+ }
+ }
+ } else {
+ assert(!"shouldn't happen");
}
}