summaryrefslogtreecommitdiff
path: root/decoder/hg_remove_eps.cc
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-07-28 18:35:11 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-07-28 18:35:11 -0400
commitdf4a193f16f6966c83c608adbb5b0680a626ce9f (patch)
tree09f414e8536b1d3fedd7fe75e3a41a59d7a9c394 /decoder/hg_remove_eps.cc
parentec7b8d08993072030e4749d850ead9edaf9deb5c (diff)
slight improvement to the way NTs are handled in the FST-CFG composition algorithm, new rescoring mode
Diffstat (limited to 'decoder/hg_remove_eps.cc')
-rw-r--r--decoder/hg_remove_eps.cc91
1 files changed, 91 insertions, 0 deletions
diff --git a/decoder/hg_remove_eps.cc b/decoder/hg_remove_eps.cc
new file mode 100644
index 00000000..050c4876
--- /dev/null
+++ b/decoder/hg_remove_eps.cc
@@ -0,0 +1,91 @@
+#include "hg_remove_eps.h"
+
+#include <cassert>
+
+#include "trule.h"
+#include "hg.h"
+
+using namespace std;
+
+namespace {
+ TRulePtr kEPSRule;
+ TRulePtr kUnaryRule;
+
+ TRulePtr CreateUnaryRule(int lhs, int rhs) {
+ if (!kUnaryRule) kUnaryRule.reset(new TRule("[X] ||| [X,1] ||| [X,1]"));
+ TRule* r = new TRule(*kUnaryRule);
+ assert(lhs < 0);
+ assert(rhs < 0);
+ r->lhs_ = lhs;
+ r->f_[0] = rhs;
+ return TRulePtr(r);
+ }
+
+ TRulePtr CreateEpsilonRule(int lhs, WordID eps) {
+ if (!kEPSRule) kEPSRule.reset(new TRule("[X] ||| <eps> ||| <eps>"));
+ TRule* r = new TRule(*kEPSRule);
+ r->lhs_ = lhs;
+ assert(lhs < 0);
+ assert(eps > 0);
+ r->e_[0] = eps;
+ r->f_[0] = eps;
+ return TRulePtr(r);
+ }
+}
+
+void RemoveEpsilons(Hypergraph* g, WordID eps) {
+ vector<bool> kill(g->edges_.size(), false);
+ for (unsigned i = 0; i < g->edges_.size(); ++i) {
+ const Hypergraph::Edge& edge = g->edges_[i];
+ if (edge.tail_nodes_.empty() &&
+ edge.rule_->f_.size() == 1 &&
+ edge.rule_->f_[0] == eps) {
+ kill[i] = true;
+ if (!edge.feature_values_.empty()) {
+ Hypergraph::Node& node = g->nodes_[edge.head_node_];
+ if (node.in_edges_.size() != 1) {
+ cerr << "[WARNING] <eps> edge with features going into non-empty node - can't promote\n";
+ // this *probably* means that there are multiple derivations of the
+ // same sequence via different paths through the input forest
+ // this needs to be investigated and fixed
+ } else {
+ for (unsigned j = 0; j < node.out_edges_.size(); ++j)
+ g->edges_[node.out_edges_[j]].feature_values_ += edge.feature_values_;
+ // cerr << "PROMOTED " << edge.feature_values_ << endl;
+ }
+ }
+ }
+ }
+ bool created_eps = false;
+ g->PruneEdges(kill);
+ for (unsigned i = 0; i < g->nodes_.size(); ++i) {
+ const Hypergraph::Node& node = g->nodes_[i];
+ if (node.in_edges_.empty()) {
+ for (unsigned j = 0; j < node.out_edges_.size(); ++j) {
+ Hypergraph::Edge& edge = g->edges_[node.out_edges_[j]];
+ const int lhs = edge.rule_->lhs_;
+ if (edge.rule_->Arity() == 2) {
+ assert(edge.rule_->f_.size() == 2);
+ assert(edge.rule_->e_.size() == 2);
+ unsigned cur = node.id_;
+ int t = -1;
+ assert(edge.tail_nodes_.size() == 2);
+ int rhs = 0;
+ for (unsigned i = 0; i < 2u; ++i) if (edge.tail_nodes_[i] != cur) { t = edge.tail_nodes_[i]; rhs = edge.rule_->f_[i]; }
+ assert(t != -1);
+ edge.tail_nodes_.resize(1);
+ edge.tail_nodes_[0] = t;
+ edge.rule_ = CreateUnaryRule(lhs, rhs);
+ } else {
+ edge.rule_ = CreateEpsilonRule(lhs, eps);
+ edge.tail_nodes_.clear();
+ created_eps = true;
+ }
+ }
+ }
+ }
+ vector<bool> k2(g->edges_.size(), false);
+ g->PruneEdges(k2);
+ if (created_eps) RemoveEpsilons(g, eps);
+}
+