From 70d40b49fb797c1ac0ede30ff031ccafba740234 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 26 Dec 2013 20:21:06 -0600 Subject: add support for epsilons in input lattice --- decoder/bottom_up_parser.cc | 22 +++++++++++++++++----- decoder/scfg_translator.cc | 2 +- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/decoder/bottom_up_parser.cc b/decoder/bottom_up_parser.cc index ed79aaf0..606b8d7e 100644 --- a/decoder/bottom_up_parser.cc +++ b/decoder/bottom_up_parser.cc @@ -14,6 +14,8 @@ using namespace std; +static WordID kEPS = 0; + class ActiveChart; class PassiveChart { public: @@ -74,9 +76,12 @@ class ActiveChart { gptr_(g), ant_nodes_(), lattice_cost(0.0) {} void ExtendTerminal(int symbol, float src_cost, vector* out_cell) const { - const GrammarIter* ni = gptr_->Extend(symbol); - if (ni) { - out_cell->push_back(ActiveItem(ni, ant_nodes_, lattice_cost + src_cost)); + if (symbol == kEPS) { + out_cell->push_back(ActiveItem(gptr_, ant_nodes_, lattice_cost + src_cost)); + } else { + const GrammarIter* ni = gptr_->Extend(symbol); + if (ni) + out_cell->push_back(ActiveItem(ni, ant_nodes_, lattice_cost + src_cost)); } } void ExtendNonTerminal(const Hypergraph* hg, int node_index, vector* out_cell) const { @@ -127,8 +132,10 @@ class ActiveChart { const WordID& f = ai->label; const double& c = ai->cost; const int& len = ai->dist2next; - //VLOG(1) << "F: " << TD::Convert(f) << endl; + //cerr << "F: " << TD::Convert(f) << " dest=" << i << "," << (j+len-1) << endl; const vector& ec = act_chart_(i, j-1); + //cerr << " SRC=" << i << "," << (j-1) << " [ec=" << ec.size() << "]" << endl; + //if (ec.size() > 0) { cerr << " LC=" << ec[0].lattice_cost << endl; } for (vector::const_iterator di = ec.begin(); di != ec.end(); ++di) di->ExtendTerminal(f, c, &act_chart_(i, j + len - 1)); } @@ -166,6 +173,7 @@ void PassiveChart::ApplyRule(const int i, const Hypergraph::TailNodeVector& ant_nodes, const float lattice_cost) { Hypergraph::Edge* new_edge = forest_->AddEdge(r, ant_nodes); + //cerr << i << " " << j << ": APPLYING RULE: " << r->AsString() << endl; new_edge->prev_i_ = r->prev_i; new_edge->prev_j_ = r->prev_j; new_edge->i_ = i; @@ -198,8 +206,11 @@ void PassiveChart::ApplyRules(const int i, const Hypergraph::TailNodeVector& tail, const float lattice_cost) { const int n = rules->GetNumRules(); - for (int k = 0; k < n; ++k) + //cerr << i << " " << j << ": NUM RULES: " << n << endl; + for (int k = 0; k < n; ++k) { + //cerr << i << " " << j << ": R=" << rules->GetIthRule(k)->AsString() << endl; ApplyRule(i, j, rules->GetIthRule(k), tail, lattice_cost); + } } void PassiveChart::ApplyUnaryRules(const int i, const int j) { @@ -284,6 +295,7 @@ ExhaustiveBottomUpParser::ExhaustiveBottomUpParser( bool ExhaustiveBottomUpParser::Parse(const Lattice& input, Hypergraph* forest) const { + kEPS = TD::Convert("*EPS*"); PassiveChart chart(goal_sym_, grammars_, input, forest); const bool result = chart.Parse(); return result; diff --git a/decoder/scfg_translator.cc b/decoder/scfg_translator.cc index a506c591..236d7c90 100644 --- a/decoder/scfg_translator.cc +++ b/decoder/scfg_translator.cc @@ -78,7 +78,7 @@ PassThroughGrammar::PassThroughGrammar(const Lattice& input, const string& cat, } bool PassThroughGrammar::HasRuleForSpan(int, int, int distance) const { - return (distance < 2); + return (distance < 4); // TODO this isn't great, but helps with EPS lattices } struct SCFGTranslatorImpl { -- cgit v1.2.3