From 16d0e2a34df3e32e8992aeda3ad2de7a6e525f14 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 28 Jul 2012 18:35:11 -0400 Subject: slight improvement to the way NTs are handled in the FST-CFG composition algorithm, new rescoring mode --- decoder/hg_remove_eps.cc | 91 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 decoder/hg_remove_eps.cc (limited to 'decoder/hg_remove_eps.cc') diff --git a/decoder/hg_remove_eps.cc b/decoder/hg_remove_eps.cc new file mode 100644 index 00000000..050c4876 --- /dev/null +++ b/decoder/hg_remove_eps.cc @@ -0,0 +1,91 @@ +#include "hg_remove_eps.h" + +#include + +#include "trule.h" +#include "hg.h" + +using namespace std; + +namespace { + TRulePtr kEPSRule; + TRulePtr kUnaryRule; + + TRulePtr CreateUnaryRule(int lhs, int rhs) { + if (!kUnaryRule) kUnaryRule.reset(new TRule("[X] ||| [X,1] ||| [X,1]")); + TRule* r = new TRule(*kUnaryRule); + assert(lhs < 0); + assert(rhs < 0); + r->lhs_ = lhs; + r->f_[0] = rhs; + return TRulePtr(r); + } + + TRulePtr CreateEpsilonRule(int lhs, WordID eps) { + if (!kEPSRule) kEPSRule.reset(new TRule("[X] ||| ||| ")); + TRule* r = new TRule(*kEPSRule); + r->lhs_ = lhs; + assert(lhs < 0); + assert(eps > 0); + r->e_[0] = eps; + r->f_[0] = eps; + return TRulePtr(r); + } +} + +void RemoveEpsilons(Hypergraph* g, WordID eps) { + vector kill(g->edges_.size(), false); + for (unsigned i = 0; i < g->edges_.size(); ++i) { + const Hypergraph::Edge& edge = g->edges_[i]; + if (edge.tail_nodes_.empty() && + edge.rule_->f_.size() == 1 && + edge.rule_->f_[0] == eps) { + kill[i] = true; + if (!edge.feature_values_.empty()) { + Hypergraph::Node& node = g->nodes_[edge.head_node_]; + if (node.in_edges_.size() != 1) { + cerr << "[WARNING] edge with features going into non-empty node - can't promote\n"; + // this *probably* means that there are multiple derivations of the + // same sequence via different paths through the input forest + // this needs to be investigated and fixed + } else { + for (unsigned j = 0; j < node.out_edges_.size(); ++j) + g->edges_[node.out_edges_[j]].feature_values_ += edge.feature_values_; + // cerr << "PROMOTED " << edge.feature_values_ << endl; + } + } + } + } + bool created_eps = false; + g->PruneEdges(kill); + for (unsigned i = 0; i < g->nodes_.size(); ++i) { + const Hypergraph::Node& node = g->nodes_[i]; + if (node.in_edges_.empty()) { + for (unsigned j = 0; j < node.out_edges_.size(); ++j) { + Hypergraph::Edge& edge = g->edges_[node.out_edges_[j]]; + const int lhs = edge.rule_->lhs_; + if (edge.rule_->Arity() == 2) { + assert(edge.rule_->f_.size() == 2); + assert(edge.rule_->e_.size() == 2); + unsigned cur = node.id_; + int t = -1; + assert(edge.tail_nodes_.size() == 2); + int rhs = 0; + for (unsigned i = 0; i < 2u; ++i) if (edge.tail_nodes_[i] != cur) { t = edge.tail_nodes_[i]; rhs = edge.rule_->f_[i]; } + assert(t != -1); + edge.tail_nodes_.resize(1); + edge.tail_nodes_[0] = t; + edge.rule_ = CreateUnaryRule(lhs, rhs); + } else { + edge.rule_ = CreateEpsilonRule(lhs, eps); + edge.tail_nodes_.clear(); + created_eps = true; + } + } + } + } + vector k2(g->edges_.size(), false); + g->PruneEdges(k2); + if (created_eps) RemoveEpsilons(g, eps); +} + -- cgit v1.2.3