summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-07-29 21:04:14 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-07-29 21:04:14 -0400
commitbd897e664a7ca51caa3cd76e444466cc410ac354 (patch)
treec6e2500d781635f6393c5c66b94e4584136e1de0
parenta9a52d22b824ccec7104036fc746d9cc30155681 (diff)
fix grammar converter to remove edges that cannot exist in any valid derivation
-rw-r--r--decoder/hg_io.cc1
-rw-r--r--decoder/inside_outside.h4
-rw-r--r--decoder/rescore_translator.cc1
-rw-r--r--training/grammar_convert.cc27
4 files changed, 29 insertions, 4 deletions
diff --git a/decoder/hg_io.cc b/decoder/hg_io.cc
index bfb2fb80..8bd40387 100644
--- a/decoder/hg_io.cc
+++ b/decoder/hg_io.cc
@@ -261,6 +261,7 @@ static void WriteRule(const TRule& r, ostream* out) {
}
bool HypergraphIO::WriteToJSON(const Hypergraph& hg, bool remove_rules, ostream* out) {
+ if (hg.empty()) { *out << "{}\n"; return true; }
map<const TRule*, int> rid;
ostream& o = *out;
rid[NULL] = 0;
diff --git a/decoder/inside_outside.h b/decoder/inside_outside.h
index bb7f9fcc..f73a1d3f 100644
--- a/decoder/inside_outside.h
+++ b/decoder/inside_outside.h
@@ -41,10 +41,6 @@ WeightType Inside(const Hypergraph& hg,
WeightType* const cur_node_inside_score = &inside_score[i];
Hypergraph::EdgesVector const& in=hg.nodes_[i].in_edges_;
const unsigned num_in_edges = in.size();
- if (num_in_edges == 0) {
- *cur_node_inside_score = WeightType(1); //FIXME: why not call weight(edge) instead?
- continue;
- }
for (unsigned j = 0; j < num_in_edges; ++j) {
const Hypergraph::Edge& edge = hg.edges_[in[j]];
WeightType score = weight(edge);
diff --git a/decoder/rescore_translator.cc b/decoder/rescore_translator.cc
index 5c417393..10192f7a 100644
--- a/decoder/rescore_translator.cc
+++ b/decoder/rescore_translator.cc
@@ -20,6 +20,7 @@ struct RescoreTranslatorImpl {
bool Translate(const string& input,
const vector<double>& weights,
Hypergraph* forest) {
+ if (input == "{}") return false;
if (input.find("{\"rules\"") == 0) {
istringstream is(input);
Hypergraph src_cfg_hg;
diff --git a/training/grammar_convert.cc b/training/grammar_convert.cc
index bf8abb26..607a7cb9 100644
--- a/training/grammar_convert.cc
+++ b/training/grammar_convert.cc
@@ -9,6 +9,7 @@
#include <boost/lexical_cast.hpp>
#include <boost/program_options.hpp>
+#include "inside_outside.h"
#include "tdict.h"
#include "filelib.h"
#include "hg.h"
@@ -69,6 +70,32 @@ void FilterAndCheckCorrectness(int goal, Hypergraph* hg) {
if (hg->nodes_.size() != old_size) {
cerr << "Warning! During sorting " << (old_size - hg->nodes_.size()) << " disappeared!\n";
}
+ vector<double> inside; // inside score at each node
+ double p = Inside<double, TransitionCountWeightFunction>(*hg, &inside);
+ if (!p) {
+ cerr << "Warning! Grammar defines the empty language!\n";
+ hg->clear();
+ return;
+ }
+ vector<bool> prune(hg->edges_.size(), false);
+ int bad_edges = 0;
+ for (unsigned i = 0; i < hg->edges_.size(); ++i) {
+ Hypergraph::Edge& edge = hg->edges_[i];
+ bool bad = false;
+ for (unsigned j = 0; j < edge.tail_nodes_.size(); ++j) {
+ if (!inside[edge.tail_nodes_[j]]) {
+ bad = true;
+ ++bad_edges;
+ }
+ }
+ prune[i] = bad;
+ }
+ cerr << "Removing " << bad_edges << " bad edges from the grammar.\n";
+ for (unsigned i = 0; i < hg->edges_.size(); ++i) {
+ if (prune[i])
+ cerr << " " << hg->edges_[i].rule_->AsString() << endl;
+ }
+ hg->PruneEdges(prune);
}
void CreateEdge(const TRulePtr& r, const Hypergraph::TailNodeVector& tail, Hypergraph::Node* head_node, Hypergraph* hg) {