From c7d1b04980d9d90458625a7f8e92985c7409a78d Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 29 Jul 2012 21:04:14 -0400 Subject: fix grammar converter to remove edges that cannot exist in any valid derivation --- decoder/hg_io.cc | 1 + decoder/inside_outside.h | 4 ---- decoder/rescore_translator.cc | 1 + training/grammar_convert.cc | 27 +++++++++++++++++++++++++++ 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc index bfb2fb80..8bd40387 100644 --- a/decoder/hg_io.cc +++ b/decoder/hg_io.cc @@ -261,6 +261,7 @@ static void WriteRule(const TRule& r, ostream* out) { } bool HypergraphIO::WriteToJSON(const Hypergraph& hg, bool remove_rules, ostream* out) { + if (hg.empty()) { *out << "{}\n"; return true; } map rid; ostream& o = *out; rid[NULL] = 0; diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h index bb7f9fcc..f73a1d3f 100644 --- a/decoder/inside_outside.h +++ b/decoder/inside_outside.h @@ -41,10 +41,6 @@ WeightType Inside(const Hypergraph& hg, WeightType* const cur_node_inside_score = &inside_score[i]; Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_; const unsigned num_in_edges = in.size(); - if (num_in_edges == 0) { - *cur_node_inside_score = WeightType(1); //FIXME: why not call weight(edge) instead? - continue; - } for (unsigned j = 0; j < num_in_edges; ++j) { const Hypergraph::Edge& edge = hg.edges_[in[j]]; WeightType score = weight(edge); diff --git a/decoder/rescore_translator.cc b/decoder/rescore_translator.cc index 5c417393..10192f7a 100644 --- a/decoder/rescore_translator.cc +++ b/decoder/rescore_translator.cc @@ -20,6 +20,7 @@ struct RescoreTranslatorImpl { bool Translate(const string& input, const vector& weights, Hypergraph* forest) { + if (input == "{}") return false; if (input.find("{\"rules\"") == 0) { istringstream is(input); Hypergraph src_cfg_hg; diff --git a/training/grammar_convert.cc b/training/grammar_convert.cc index bf8abb26..607a7cb9 100644 --- a/training/grammar_convert.cc +++ b/training/grammar_convert.cc @@ -9,6 +9,7 @@ #include #include +#include "inside_outside.h" #include "tdict.h" #include "filelib.h" #include "hg.h" @@ -69,6 +70,32 @@ void FilterAndCheckCorrectness(int goal, Hypergraph* hg) { if (hg->nodes_.size() != old_size) { cerr << "Warning! During sorting " << (old_size - hg->nodes_.size()) << " disappeared!\n"; } + vector inside; // inside score at each node + double p = Inside(*hg, &inside); + if (!p) { + cerr << "Warning! Grammar defines the empty language!\n"; + hg->clear(); + return; + } + vector prune(hg->edges_.size(), false); + int bad_edges = 0; + for (unsigned i = 0; i < hg->edges_.size(); ++i) { + Hypergraph::Edge& edge = hg->edges_[i]; + bool bad = false; + for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) { + if (!inside[edge.tail_nodes_[j]]) { + bad = true; + ++bad_edges; + } + } + prune[i] = bad; + } + cerr << "Removing " << bad_edges << " bad edges from the grammar.\n"; + for (unsigned i = 0; i < hg->edges_.size(); ++i) { + if (prune[i]) + cerr << " " << hg->edges_[i].rule_->AsString() << endl; + } + hg->PruneEdges(prune); } void CreateEdge(const TRulePtr& r, const Hypergraph::TailNodeVector& tail, Hypergraph::Node* head_node, Hypergraph* hg) { -- cgit v1.2.3