summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <redpony@gmail.com>2014-04-25 23:45:32 -0400
committerChris Dyer <redpony@gmail.com>2014-04-25 23:45:32 -0400
commit1400adb9f5e8ebf8ea1658d658893f0731b6fc22 (patch)
treea6d9ff65a7cca2b4326640a2fc2929e3a3063e0a
parent94d515b00d66d9ea18758b951c1ce4789f3875b0 (diff)
check for non-rescorable hypergraphs
-rw-r--r--decoder/decoder.cc5
-rw-r--r--decoder/hg.cc7
-rw-r--r--decoder/hg.h4
-rw-r--r--tests/system_tests/cfg_rescore/input.cfg5
-rw-r--r--tests/system_tests/cfg_rescore/input.txt2
-rw-r--r--training/utils/grammar_convert.cc16
6 files changed, 31 insertions, 8 deletions
diff --git a/decoder/decoder.cc b/decoder/decoder.cc
index 354ea2d9..41f36822 100644
--- a/decoder/decoder.cc
+++ b/decoder/decoder.cc
@@ -755,6 +755,11 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) {
if (!SILENT) cerr << " *** NODES NOT UNIQUELY IDENTIFIED ***\n";
}
+ if (!forest.ArePreGoalEdgesArity1()) {
+ cerr << "Pre-goal edges are not arity-1. The decoder requires this.\n";
+ abort();
+ }
+
const bool show_tree_structure=conf.count("show_tree_structure");
if (!SILENT) forest_stats(forest," Init. forest",show_tree_structure,oracle.show_derivation);
if (conf.count("show_expected_length")) {
diff --git a/decoder/hg.cc b/decoder/hg.cc
index e456fa7c..46543b01 100644
--- a/decoder/hg.cc
+++ b/decoder/hg.cc
@@ -28,6 +28,13 @@ bool Hypergraph::AreNodesUniquelyIdentified() const {
return true;
}
+bool Hypergraph::ArePreGoalEdgesArity1() const {
+ auto& n = nodes_.back();
+ for (auto eid : n.in_edges_)
+ if (edges_[eid].Arity() != 1) return false;
+ return true;
+}
+
Hypergraph::Edge const* Hypergraph::ViterbiSortInEdges() {
NodeProbs nv;
ComputeNodeViterbi(&nv);
diff --git a/decoder/hg.h b/decoder/hg.h
index 43fb275b..4ed27d87 100644
--- a/decoder/hg.h
+++ b/decoder/hg.h
@@ -268,6 +268,10 @@ public:
// if all node states are unique, return true
bool AreNodesUniquelyIdentified() const;
+ // the feature function interface assumes that pre-goal edges are
+ // arity 1 (this simplifies the "final transition" feature computation)
+ bool ArePreGoalEdgesArity1() const;
+
// reserves space in the nodes vector to prevent memory locations
// from changing
void ReserveNodes(size_t n, size_t e = 0) {
diff --git a/tests/system_tests/cfg_rescore/input.cfg b/tests/system_tests/cfg_rescore/input.cfg
index 0073cb7b..75602c75 100644
--- a/tests/system_tests/cfg_rescore/input.cfg
+++ b/tests/system_tests/cfg_rescore/input.cfg
@@ -1,9 +1,8 @@
-[S] ||| [S1]
-[S1] ||| [NP1] [VP] ||| Active=1
+[S] ||| [NP1] [VP] ||| Active=1
+[S] ||| [NP2] [VPSV] by [NP1] ||| Passive=1
[VP] ||| [V] [NP2]
[V] ||| ate
[VPSV] ||| was eaten
-[S1] ||| [NP2] [VPSV] by [NP1] ||| Passive=1
[NP1] ||| John
[NP2] ||| broccoli
[NP2] ||| the broccoli ||| Definite=1
diff --git a/tests/system_tests/cfg_rescore/input.txt b/tests/system_tests/cfg_rescore/input.txt
index 71fc26bc..2999a5fb 100644
--- a/tests/system_tests/cfg_rescore/input.txt
+++ b/tests/system_tests/cfg_rescore/input.txt
@@ -1 +1 @@
-{"rules":[1,"[S] ||| [S1] ||| [1]",2,"[S1] ||| [NP1] [VP] ||| [1] [2] ||| Active=1",3,"[VP] ||| [V] [NP2] ||| [1] [2]",4,"[V] ||| ate ||| ate",5,"[VPSV] ||| was eaten ||| was eaten",6,"[S1] ||| [NP2] [VPSV] by [NP1] ||| [1] [2] by [3] ||| Passive=1",7,"[NP1] ||| John ||| John",8,"[NP2] ||| broccoli ||| broccoli",9,"[NP2] ||| the broccoli ||| the broccoli ||| Definite=1"],"features":["PhraseModel_0","PhraseModel_1","PhraseModel_2","PhraseModel_3","PhraseModel_4","PhraseModel_5","PhraseModel_6","PhraseModel_7","PhraseModel_8","PhraseModel_9","PhraseModel_10","PhraseModel_11","PhraseModel_12","PhraseModel_13","PhraseModel_14","PhraseModel_15","PhraseModel_16","PhraseModel_17","PhraseModel_18","PhraseModel_19","PhraseModel_20","PhraseModel_21","PhraseModel_22","PhraseModel_23","PhraseModel_24","PhraseModel_25","PhraseModel_26","PhraseModel_27","PhraseModel_28","PhraseModel_29","PhraseModel_30","PhraseModel_31","PhraseModel_32","PhraseModel_33","PhraseModel_34","PhraseModel_35","PhraseModel_36","PhraseModel_37","PhraseModel_38","PhraseModel_39","PhraseModel_40","PhraseModel_41","PhraseModel_42","PhraseModel_43","PhraseModel_44","PhraseModel_45","PhraseModel_46","PhraseModel_47","PhraseModel_48","PhraseModel_49","PhraseModel_50","PhraseModel_51","PhraseModel_52","PhraseModel_53","PhraseModel_54","PhraseModel_55","PhraseModel_56","PhraseModel_57","PhraseModel_58","PhraseModel_59","PhraseModel_60","PhraseModel_61","PhraseModel_62","PhraseModel_63","PhraseModel_64","PhraseModel_65","PhraseModel_66","PhraseModel_67","PhraseModel_68","PhraseModel_69","PhraseModel_70","PhraseModel_71","PhraseModel_72","PhraseModel_73","PhraseModel_74","PhraseModel_75","PhraseModel_76","PhraseModel_77","PhraseModel_78","PhraseModel_79","PhraseModel_80","PhraseModel_81","PhraseModel_82","PhraseModel_83","PhraseModel_84","PhraseModel_85","PhraseModel_86","PhraseModel_87","PhraseModel_88","PhraseModel_89","PhraseModel_90","PhraseModel_91","PhraseModel_92","PhraseModel_93","PhraseModel_94","PhraseModel_95","PhraseModel_96","PhraseModel_97","PhraseModel_98","PhraseModel_99","Active","Passive","Definite"],"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":7}],"node":{"in_edges":[0],"cat":"NP1","node_hash":"0000000000000007"},"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":4}],"node":{"in_edges":[1],"cat":"V","node_hash":"0000000000000004"},"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":8},{"tail":[],"spans":[-1,-1,-1,-1],"feats":[102,1],"rule":9}],"node":{"in_edges":[2,3],"cat":"NP2","node_hash":"0000000000000009"},"edges":[{"tail":[1,2],"spans":[-1,-1,-1,-1],"feats":[],"rule":3}],"node":{"in_edges":[4],"cat":"VP","node_hash":"0000000000000003"},"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":5}],"node":{"in_edges":[5],"cat":"VPSV","node_hash":"0000000000000005"},"edges":[{"tail":[0,3],"spans":[-1,-1,-1,-1],"feats":[100,1],"rule":2},{"tail":[2,4,0],"spans":[-1,-1,-1,-1],"feats":[101,1],"rule":6}],"node":{"in_edges":[6,7],"cat":"S1","node_hash":"0000000000000006"},"edges":[{"tail":[5],"spans":[-1,-1,-1,-1],"feats":[],"rule":1}],"node":{"in_edges":[8],"cat":"S","node_hash":"0000000000000001"}}
+{"rules":[1,"[S] ||| [NP1] [VP] ||| [1] [2] ||| Active=1",2,"[S] ||| [NP2] [VPSV] by [NP1] ||| [1] [2] by [3] ||| Passive=1",3,"[VP] ||| [V] [NP2] ||| [1] [2]",4,"[V] ||| ate ||| ate",5,"[VPSV] ||| was eaten ||| was eaten",6,"[NP1] ||| John ||| John",7,"[NP2] ||| broccoli ||| broccoli",8,"[NP2] ||| the broccoli ||| the broccoli ||| Definite=1",9,"[Goal] ||| [X] ||| [1]"],"features":["PhraseModel_0","PhraseModel_1","PhraseModel_2","PhraseModel_3","PhraseModel_4","PhraseModel_5","PhraseModel_6","PhraseModel_7","PhraseModel_8","PhraseModel_9","PhraseModel_10","PhraseModel_11","PhraseModel_12","PhraseModel_13","PhraseModel_14","PhraseModel_15","PhraseModel_16","PhraseModel_17","PhraseModel_18","PhraseModel_19","PhraseModel_20","PhraseModel_21","PhraseModel_22","PhraseModel_23","PhraseModel_24","PhraseModel_25","PhraseModel_26","PhraseModel_27","PhraseModel_28","PhraseModel_29","PhraseModel_30","PhraseModel_31","PhraseModel_32","PhraseModel_33","PhraseModel_34","PhraseModel_35","PhraseModel_36","PhraseModel_37","PhraseModel_38","PhraseModel_39","PhraseModel_40","PhraseModel_41","PhraseModel_42","PhraseModel_43","PhraseModel_44","PhraseModel_45","PhraseModel_46","PhraseModel_47","PhraseModel_48","PhraseModel_49","PhraseModel_50","PhraseModel_51","PhraseModel_52","PhraseModel_53","PhraseModel_54","PhraseModel_55","PhraseModel_56","PhraseModel_57","PhraseModel_58","PhraseModel_59","PhraseModel_60","PhraseModel_61","PhraseModel_62","PhraseModel_63","PhraseModel_64","PhraseModel_65","PhraseModel_66","PhraseModel_67","PhraseModel_68","PhraseModel_69","PhraseModel_70","PhraseModel_71","PhraseModel_72","PhraseModel_73","PhraseModel_74","PhraseModel_75","PhraseModel_76","PhraseModel_77","PhraseModel_78","PhraseModel_79","PhraseModel_80","PhraseModel_81","PhraseModel_82","PhraseModel_83","PhraseModel_84","PhraseModel_85","PhraseModel_86","PhraseModel_87","PhraseModel_88","PhraseModel_89","PhraseModel_90","PhraseModel_91","PhraseModel_92","PhraseModel_93","PhraseModel_94","PhraseModel_95","PhraseModel_96","PhraseModel_97","PhraseModel_98","PhraseModel_99","Active","Passive","Definite"],"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":6}],"node":{"in_edges":[0],"cat":"NP1","node_hash":"0000000000000006"},"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":4}],"node":{"in_edges":[1],"cat":"V","node_hash":"0000000000000004"},"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":7},{"tail":[],"spans":[-1,-1,-1,-1],"feats":[102,1],"rule":8}],"node":{"in_edges":[2,3],"cat":"NP2","node_hash":"0000000000000008"},"edges":[{"tail":[1,2],"spans":[-1,-1,-1,-1],"feats":[],"rule":3}],"node":{"in_edges":[4],"cat":"VP","node_hash":"0000000000000003"},"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":5}],"node":{"in_edges":[5],"cat":"VPSV","node_hash":"0000000000000005"},"edges":[{"tail":[0,3],"spans":[-1,-1,-1,-1],"feats":[100,1],"rule":1},{"tail":[2,4,0],"spans":[-1,-1,-1,-1],"feats":[101,1],"rule":2}],"node":{"in_edges":[6,7],"cat":"S","node_hash":"0000000000000002"},"edges":[{"tail":[5],"spans":[-1,-1,-1,-1],"feats":[],"rule":9}],"node":{"in_edges":[8],"cat":"Goal","node_hash":"000000000000003D"}}
diff --git a/training/utils/grammar_convert.cc b/training/utils/grammar_convert.cc
index 58d1957c..5c1b4d4a 100644
--- a/training/utils/grammar_convert.cc
+++ b/training/utils/grammar_convert.cc
@@ -56,15 +56,22 @@ int GetOrCreateNode(const WordID& lhs, map<WordID, int>* lhs2node, Hypergraph* h
return node_id - 1;
}
+void AddDummyGoalNode(Hypergraph* hg) {
+ static const int kGOAL = -TD::Convert("Goal");
+ static TRulePtr kGOAL_RULE(new TRule("[Goal] ||| [X] ||| [1]"));
+ unsigned old_goal_node_idx = hg->nodes_.size() - 1;
+ HG::Node* goal_node = hg->AddNode(kGOAL);
+ goal_node->node_hash = goal_node->id_ * 10 + 1;
+ TailNodeVector tail(1, old_goal_node_idx);
+ HG::Edge* new_edge = hg->AddEdge(kGOAL_RULE, tail);
+ hg->ConnectEdgeToHeadNode(new_edge, goal_node);
+}
+
void FilterAndCheckCorrectness(int goal, Hypergraph* hg) {
if (goal < 0) {
cerr << "Error! [S] not found in grammar!\n";
exit(1);
}
- if (hg->nodes_[goal].in_edges_.size() != 1) {
- cerr << "Error! [S] has more than one rewrite!\n";
- exit(1);
- }
int old_size = hg->nodes_.size();
hg->TopologicallySortNodesAndEdges(goal);
if (hg->nodes_.size() != old_size) {
@@ -319,6 +326,7 @@ int main(int argc, char **argv) {
if (line.empty()) {
int goal = lhs2node[kSTART] - 1;
FilterAndCheckCorrectness(goal, &hg);
+ AddDummyGoalNode(&hg);
ProcessHypergraph(w, conf, "", &hg);
hg.clear();
lhs2node.clear();