From 6a3e80bb4b2a6bb10899183299d14fc070db1654 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 1 Apr 2014 00:53:00 -0400 Subject: tree2string test, fix for edge case --- tests/system_tests/t2s/cdec.ini | 2 ++ tests/system_tests/t2s/gold.statistics | 3 +++ tests/system_tests/t2s/gold.stdout | 1 + tests/system_tests/t2s/grammar.t2s | 8 ++++++++ tests/system_tests/t2s/input.txt | 1 + tests/system_tests/t2s/weights | 6 ++++++ 6 files changed, 21 insertions(+) create mode 100644 tests/system_tests/t2s/cdec.ini create mode 100644 tests/system_tests/t2s/gold.statistics create mode 100644 tests/system_tests/t2s/gold.stdout create mode 100644 tests/system_tests/t2s/grammar.t2s create mode 100644 tests/system_tests/t2s/input.txt create mode 100644 tests/system_tests/t2s/weights (limited to 'tests/system_tests') diff --git a/tests/system_tests/t2s/cdec.ini b/tests/system_tests/t2s/cdec.ini new file mode 100644 index 00000000..ad83438f --- /dev/null +++ b/tests/system_tests/t2s/cdec.ini @@ -0,0 +1,2 @@ +formalism=t2s +grammar=grammar.t2s diff --git a/tests/system_tests/t2s/gold.statistics b/tests/system_tests/t2s/gold.statistics new file mode 100644 index 00000000..452cc93e --- /dev/null +++ b/tests/system_tests/t2s/gold.statistics @@ -0,0 +1,3 @@ +-lm_nodes 6 +-lm_edges 8 +-lm_paths 4 diff --git a/tests/system_tests/t2s/gold.stdout b/tests/system_tests/t2s/gold.stdout new file mode 100644 index 00000000..afb11818 --- /dev/null +++ b/tests/system_tests/t2s/gold.stdout @@ -0,0 +1 @@ +qiangshou bei jingfang jibi . diff --git a/tests/system_tests/t2s/grammar.t2s b/tests/system_tests/t2s/grammar.t2s new file mode 100644 index 00000000..2e6cf68c --- /dev/null +++ b/tests/system_tests/t2s/grammar.t2s @@ -0,0 +1,8 @@ +(S [NP-C] [VP] (PUNC .)) ||| [1] [2] . ||| R1=1 +(NP-C (DT the) (NN gunman)) ||| qiangshou ||| R2=1 +(NP-C (DT the) [NN]) ||| [1] ||| R2a=1 +(NN gunman) ||| qiangshou ||| R2b=1 +(VP (VBD was) (VP-C [VBN] (PP (IN by) [NP-C]))) ||| bei [2] [1] ||| R3=1 +(NP-C (DT the) (NN police)) ||| jingfang ||| R4=1 +(VBN killed) ||| jibi ||| R5=1 +(VBN killed) ||| killed' ||| R6=1 diff --git a/tests/system_tests/t2s/input.txt b/tests/system_tests/t2s/input.txt new file mode 100644 index 00000000..b8fe314e --- /dev/null +++ b/tests/system_tests/t2s/input.txt @@ -0,0 +1 @@ +(S (NP-C (DT the) (NN gunman)) (VP (VBD was) (VP-C (VBN killed) (PP (IN by) (NP-C (DT the) (NN police))))) (PUNC .)) diff --git a/tests/system_tests/t2s/weights b/tests/system_tests/t2s/weights new file mode 100644 index 00000000..4980db45 --- /dev/null +++ b/tests/system_tests/t2s/weights @@ -0,0 +1,6 @@ +R1 1 +R2a 1 +R2b 1 +R3 1 +R5 1 +R4 1 -- cgit v1.2.3 From a3434180474e832eeea7d775893ce85244f8145c Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 17 Apr 2014 20:55:34 -0400 Subject: fix rescoring --- decoder/trule.cc | 6 ++++++ tests/system_tests/cfg_rescore/README | 4 ++++ tests/system_tests/cfg_rescore/cdec.ini | 2 ++ tests/system_tests/cfg_rescore/gold.statistics | 3 +++ tests/system_tests/cfg_rescore/gold.stdout | 4 ++++ tests/system_tests/cfg_rescore/input.cfg | 9 +++++++++ tests/system_tests/cfg_rescore/input.txt | 1 + tests/system_tests/cfg_rescore/weights | 3 +++ 8 files changed, 32 insertions(+) create mode 100644 tests/system_tests/cfg_rescore/README create mode 100644 tests/system_tests/cfg_rescore/cdec.ini create mode 100644 tests/system_tests/cfg_rescore/gold.statistics create mode 100644 tests/system_tests/cfg_rescore/gold.stdout create mode 100644 tests/system_tests/cfg_rescore/input.cfg create mode 100644 tests/system_tests/cfg_rescore/input.txt create mode 100644 tests/system_tests/cfg_rescore/weights (limited to 'tests/system_tests') diff --git a/decoder/trule.cc b/decoder/trule.cc index 1bd5425f..bee211d5 100644 --- a/decoder/trule.cc +++ b/decoder/trule.cc @@ -56,6 +56,12 @@ bool TRule::ReadFromString(const string& line, bool mono) { RuleLexer::ReadRule(line + '\n', assign_trule, mono, this); if (n_assigned > 1) cerr<<"\nWARNING: more than one rule parsed from multi-line string; kept last: "< input.txt diff --git a/tests/system_tests/cfg_rescore/cdec.ini b/tests/system_tests/cfg_rescore/cdec.ini new file mode 100644 index 00000000..1a913f2d --- /dev/null +++ b/tests/system_tests/cfg_rescore/cdec.ini @@ -0,0 +1,2 @@ +formalism=rescore +k_best=100 diff --git a/tests/system_tests/cfg_rescore/gold.statistics b/tests/system_tests/cfg_rescore/gold.statistics new file mode 100644 index 00000000..7b05e2d8 --- /dev/null +++ b/tests/system_tests/cfg_rescore/gold.statistics @@ -0,0 +1,3 @@ +-lm_nodes 8 +-lm_edges 10 +-lm_paths 4 diff --git a/tests/system_tests/cfg_rescore/gold.stdout b/tests/system_tests/cfg_rescore/gold.stdout new file mode 100644 index 00000000..ccf99263 --- /dev/null +++ b/tests/system_tests/cfg_rescore/gold.stdout @@ -0,0 +1,4 @@ +0 ||| the broccoli was eaten by John ||| Passive=1 Definite=1 ||| 2 +0 ||| John ate the broccoli ||| Active=1 Definite=1 ||| 1.1 +0 ||| broccoli was eaten by John ||| Passive=1 ||| 1 +0 ||| John ate broccoli ||| Active=1 ||| 0.1 diff --git a/tests/system_tests/cfg_rescore/input.cfg b/tests/system_tests/cfg_rescore/input.cfg new file mode 100644 index 00000000..0073cb7b --- /dev/null +++ b/tests/system_tests/cfg_rescore/input.cfg @@ -0,0 +1,9 @@ +[S] ||| [S1] +[S1] ||| [NP1] [VP] ||| Active=1 +[VP] ||| [V] [NP2] +[V] ||| ate +[VPSV] ||| was eaten +[S1] ||| [NP2] [VPSV] by [NP1] ||| Passive=1 +[NP1] ||| John +[NP2] ||| broccoli +[NP2] ||| the broccoli ||| Definite=1 diff --git a/tests/system_tests/cfg_rescore/input.txt b/tests/system_tests/cfg_rescore/input.txt new file mode 100644 index 00000000..71fc26bc --- /dev/null +++ b/tests/system_tests/cfg_rescore/input.txt @@ -0,0 +1 @@ +{"rules":[1,"[S] ||| [S1] ||| [1]",2,"[S1] ||| [NP1] [VP] ||| [1] [2] ||| Active=1",3,"[VP] ||| [V] [NP2] ||| [1] [2]",4,"[V] ||| ate ||| ate",5,"[VPSV] ||| was eaten ||| was eaten",6,"[S1] ||| [NP2] [VPSV] by [NP1] ||| [1] [2] by [3] ||| Passive=1",7,"[NP1] ||| John ||| John",8,"[NP2] ||| broccoli ||| broccoli",9,"[NP2] ||| the broccoli ||| the broccoli ||| Definite=1"],"features":["PhraseModel_0","PhraseModel_1","PhraseModel_2","PhraseModel_3","PhraseModel_4","PhraseModel_5","PhraseModel_6","PhraseModel_7","PhraseModel_8","PhraseModel_9","PhraseModel_10","PhraseModel_11","PhraseModel_12","PhraseModel_13","PhraseModel_14","PhraseModel_15","PhraseModel_16","PhraseModel_17","PhraseModel_18","PhraseModel_19","PhraseModel_20","PhraseModel_21","PhraseModel_22","PhraseModel_23","PhraseModel_24","PhraseModel_25","PhraseModel_26","PhraseModel_27","PhraseModel_28","PhraseModel_29","PhraseModel_30","PhraseModel_31","PhraseModel_32","PhraseModel_33","PhraseModel_34","PhraseModel_35","PhraseModel_36","PhraseModel_37","PhraseModel_38","PhraseModel_39","PhraseModel_40","PhraseModel_41","PhraseModel_42","PhraseModel_43","PhraseModel_44","PhraseModel_45","PhraseModel_46","PhraseModel_47","PhraseModel_48","PhraseModel_49","PhraseModel_50","PhraseModel_51","PhraseModel_52","PhraseModel_53","PhraseModel_54","PhraseModel_55","PhraseModel_56","PhraseModel_57","PhraseModel_58","PhraseModel_59","PhraseModel_60","PhraseModel_61","PhraseModel_62","PhraseModel_63","PhraseModel_64","PhraseModel_65","PhraseModel_66","PhraseModel_67","PhraseModel_68","PhraseModel_69","PhraseModel_70","PhraseModel_71","PhraseModel_72","PhraseModel_73","PhraseModel_74","PhraseModel_75","PhraseModel_76","PhraseModel_77","PhraseModel_78","PhraseModel_79","PhraseModel_80","PhraseModel_81","PhraseModel_82","PhraseModel_83","PhraseModel_84","PhraseModel_85","PhraseModel_86","PhraseModel_87","PhraseModel_88","PhraseModel_89","PhraseModel_90","PhraseModel_91","PhraseModel_92","PhraseModel_93","PhraseModel_94","PhraseModel_95","PhraseModel_96","PhraseModel_97","PhraseModel_98","PhraseModel_99","Active","Passive","Definite"],"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":7}],"node":{"in_edges":[0],"cat":"NP1","node_hash":"0000000000000007"},"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":4}],"node":{"in_edges":[1],"cat":"V","node_hash":"0000000000000004"},"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":8},{"tail":[],"spans":[-1,-1,-1,-1],"feats":[102,1],"rule":9}],"node":{"in_edges":[2,3],"cat":"NP2","node_hash":"0000000000000009"},"edges":[{"tail":[1,2],"spans":[-1,-1,-1,-1],"feats":[],"rule":3}],"node":{"in_edges":[4],"cat":"VP","node_hash":"0000000000000003"},"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":5}],"node":{"in_edges":[5],"cat":"VPSV","node_hash":"0000000000000005"},"edges":[{"tail":[0,3],"spans":[-1,-1,-1,-1],"feats":[100,1],"rule":2},{"tail":[2,4,0],"spans":[-1,-1,-1,-1],"feats":[101,1],"rule":6}],"node":{"in_edges":[6,7],"cat":"S1","node_hash":"0000000000000006"},"edges":[{"tail":[5],"spans":[-1,-1,-1,-1],"feats":[],"rule":1}],"node":{"in_edges":[8],"cat":"S","node_hash":"0000000000000001"}} diff --git a/tests/system_tests/cfg_rescore/weights b/tests/system_tests/cfg_rescore/weights new file mode 100644 index 00000000..bd3bb1af --- /dev/null +++ b/tests/system_tests/cfg_rescore/weights @@ -0,0 +1,3 @@ +Active 0.1 +Passive 1 +Definite 1 -- cgit v1.2.3 From 18a1d98f5bd60ea195a6c3aaf8feb740da752f7e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 25 Apr 2014 13:20:06 -0400 Subject: fix tree-to-string forest so it works with cube pruning assumptions --- decoder/tree2string_translator.cc | 17 ++++++++++++++++- tests/system_tests/t2s/gold.statistics | 4 ++-- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'tests/system_tests') diff --git a/decoder/tree2string_translator.cc b/decoder/tree2string_translator.cc index 29caaf8f..c353f7ca 100644 --- a/decoder/tree2string_translator.cc +++ b/decoder/tree2string_translator.cc @@ -126,6 +126,17 @@ namespace std { }; }; +void AddDummyGoalNode(Hypergraph* hg) { + static const int kGOAL = -TD::Convert("Goal"); + static TRulePtr kGOAL_RULE(new TRule("[Goal] ||| [X] ||| [1]")); + unsigned old_goal_node_idx = hg->nodes_.size() - 1; + HG::Node* goal_node = hg->AddNode(kGOAL); + goal_node->node_hash = 1; + TailNodeVector tail(1, old_goal_node_idx); + HG::Edge* new_edge = hg->AddEdge(kGOAL_RULE, tail); + hg->ConnectEdgeToHeadNode(new_edge, goal_node); +} + struct Tree2StringTranslatorImpl { vector> root; bool add_pass_through_rules; @@ -308,14 +319,18 @@ struct Tree2StringTranslatorImpl { if (goal_it == x2hg.end()) return false; //cerr << "Goal node: " << goal << endl; hg.TopologicallySortNodesAndEdges(goal_it->second); - hg.Reweight(weights); // there might be nodes that cannot be derived // the following takes care of them vector prune(hg.edges_.size(), false); hg.PruneEdges(prune, true); if (hg.edges_.size() == 0) return false; + // rescoring assumes the goal edge is arity 1 (code laziness), add that here + AddDummyGoalNode(&hg); + + hg.Reweight(weights); //hg.PrintGraphviz(); + minus_lm_forest->swap(hg); return true; } diff --git a/tests/system_tests/t2s/gold.statistics b/tests/system_tests/t2s/gold.statistics index 452cc93e..5778a24c 100644 --- a/tests/system_tests/t2s/gold.statistics +++ b/tests/system_tests/t2s/gold.statistics @@ -1,3 +1,3 @@ --lm_nodes 6 --lm_edges 8 +-lm_nodes 7 +-lm_edges 9 -lm_paths 4 -- cgit v1.2.3 From d033a045aa46ff876ad2c9f6929e2095b2481cdf Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 25 Apr 2014 23:45:32 -0400 Subject: check for non-rescorable hypergraphs --- decoder/decoder.cc | 5 +++++ decoder/hg.cc | 7 +++++++ decoder/hg.h | 4 ++++ tests/system_tests/cfg_rescore/input.cfg | 5 ++--- tests/system_tests/cfg_rescore/input.txt | 2 +- training/utils/grammar_convert.cc | 16 ++++++++++++---- 6 files changed, 31 insertions(+), 8 deletions(-) (limited to 'tests/system_tests') diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 354ea2d9..41f36822 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -755,6 +755,11 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { if (!SILENT) cerr << " *** NODES NOT UNIQUELY IDENTIFIED ***\n"; } + if (!forest.ArePreGoalEdgesArity1()) { + cerr << "Pre-goal edges are not arity-1. The decoder requires this.\n"; + abort(); + } + const bool show_tree_structure=conf.count("show_tree_structure"); if (!SILENT) forest_stats(forest," Init. forest",show_tree_structure,oracle.show_derivation); if (conf.count("show_expected_length")) { diff --git a/decoder/hg.cc b/decoder/hg.cc index e456fa7c..46543b01 100644 --- a/decoder/hg.cc +++ b/decoder/hg.cc @@ -28,6 +28,13 @@ bool Hypergraph::AreNodesUniquelyIdentified() const { return true; } +bool Hypergraph::ArePreGoalEdgesArity1() const { + auto& n = nodes_.back(); + for (auto eid : n.in_edges_) + if (edges_[eid].Arity() != 1) return false; + return true; +} + Hypergraph::Edge const* Hypergraph::ViterbiSortInEdges() { NodeProbs nv; ComputeNodeViterbi(&nv); diff --git a/decoder/hg.h b/decoder/hg.h index 43fb275b..4ed27d87 100644 --- a/decoder/hg.h +++ b/decoder/hg.h @@ -268,6 +268,10 @@ public: // if all node states are unique, return true bool AreNodesUniquelyIdentified() const; + // the feature function interface assumes that pre-goal edges are + // arity 1 (this simplifies the "final transition" feature computation) + bool ArePreGoalEdgesArity1() const; + // reserves space in the nodes vector to prevent memory locations // from changing void ReserveNodes(size_t n, size_t e = 0) { diff --git a/tests/system_tests/cfg_rescore/input.cfg b/tests/system_tests/cfg_rescore/input.cfg index 0073cb7b..75602c75 100644 --- a/tests/system_tests/cfg_rescore/input.cfg +++ b/tests/system_tests/cfg_rescore/input.cfg @@ -1,9 +1,8 @@ -[S] ||| [S1] -[S1] ||| [NP1] [VP] ||| Active=1 +[S] ||| [NP1] [VP] ||| Active=1 +[S] ||| [NP2] [VPSV] by [NP1] ||| Passive=1 [VP] ||| [V] [NP2] [V] ||| ate [VPSV] ||| was eaten -[S1] ||| [NP2] [VPSV] by [NP1] ||| Passive=1 [NP1] ||| John [NP2] ||| broccoli [NP2] ||| the broccoli ||| Definite=1 diff --git a/tests/system_tests/cfg_rescore/input.txt b/tests/system_tests/cfg_rescore/input.txt index 71fc26bc..2999a5fb 100644 --- a/tests/system_tests/cfg_rescore/input.txt +++ b/tests/system_tests/cfg_rescore/input.txt @@ -1 +1 @@ -{"rules":[1,"[S] ||| [S1] ||| [1]",2,"[S1] ||| [NP1] [VP] ||| [1] [2] ||| Active=1",3,"[VP] ||| [V] [NP2] ||| [1] [2]",4,"[V] ||| ate ||| ate",5,"[VPSV] ||| was eaten ||| was eaten",6,"[S1] ||| [NP2] [VPSV] by [NP1] ||| [1] [2] by [3] ||| Passive=1",7,"[NP1] ||| John ||| John",8,"[NP2] ||| broccoli ||| broccoli",9,"[NP2] ||| the broccoli ||| the broccoli ||| Definite=1"],"features":["PhraseModel_0","PhraseModel_1","PhraseModel_2","PhraseModel_3","PhraseModel_4","PhraseModel_5","PhraseModel_6","PhraseModel_7","PhraseModel_8","PhraseModel_9","PhraseModel_10","PhraseModel_11","PhraseModel_12","PhraseModel_13","PhraseModel_14","PhraseModel_15","PhraseModel_16","PhraseModel_17","PhraseModel_18","PhraseModel_19","PhraseModel_20","PhraseModel_21","PhraseModel_22","PhraseModel_23","PhraseModel_24","PhraseModel_25","PhraseModel_26","PhraseModel_27","PhraseModel_28","PhraseModel_29","PhraseModel_30","PhraseModel_31","PhraseModel_32","PhraseModel_33","PhraseModel_34","PhraseModel_35","PhraseModel_36","PhraseModel_37","PhraseModel_38","PhraseModel_39","PhraseModel_40","PhraseModel_41","PhraseModel_42","PhraseModel_43","PhraseModel_44","PhraseModel_45","PhraseModel_46","PhraseModel_47","PhraseModel_48","PhraseModel_49","PhraseModel_50","PhraseModel_51","PhraseModel_52","PhraseModel_53","PhraseModel_54","PhraseModel_55","PhraseModel_56","PhraseModel_57","PhraseModel_58","PhraseModel_59","PhraseModel_60","PhraseModel_61","PhraseModel_62","PhraseModel_63","PhraseModel_64","PhraseModel_65","PhraseModel_66","PhraseModel_67","PhraseModel_68","PhraseModel_69","PhraseModel_70","PhraseModel_71","PhraseModel_72","PhraseModel_73","PhraseModel_74","PhraseModel_75","PhraseModel_76","PhraseModel_77","PhraseModel_78","PhraseModel_79","PhraseModel_80","PhraseModel_81","PhraseModel_82","PhraseModel_83","PhraseModel_84","PhraseModel_85","PhraseModel_86","PhraseModel_87","PhraseModel_88","PhraseModel_89","PhraseModel_90","PhraseModel_91","PhraseModel_92","PhraseModel_93","PhraseModel_94","PhraseModel_95","PhraseModel_96","PhraseModel_97","PhraseModel_98","PhraseModel_99","Active","Passive","Definite"],"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":7}],"node":{"in_edges":[0],"cat":"NP1","node_hash":"0000000000000007"},"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":4}],"node":{"in_edges":[1],"cat":"V","node_hash":"0000000000000004"},"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":8},{"tail":[],"spans":[-1,-1,-1,-1],"feats":[102,1],"rule":9}],"node":{"in_edges":[2,3],"cat":"NP2","node_hash":"0000000000000009"},"edges":[{"tail":[1,2],"spans":[-1,-1,-1,-1],"feats":[],"rule":3}],"node":{"in_edges":[4],"cat":"VP","node_hash":"0000000000000003"},"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":5}],"node":{"in_edges":[5],"cat":"VPSV","node_hash":"0000000000000005"},"edges":[{"tail":[0,3],"spans":[-1,-1,-1,-1],"feats":[100,1],"rule":2},{"tail":[2,4,0],"spans":[-1,-1,-1,-1],"feats":[101,1],"rule":6}],"node":{"in_edges":[6,7],"cat":"S1","node_hash":"0000000000000006"},"edges":[{"tail":[5],"spans":[-1,-1,-1,-1],"feats":[],"rule":1}],"node":{"in_edges":[8],"cat":"S","node_hash":"0000000000000001"}} +{"rules":[1,"[S] ||| [NP1] [VP] ||| [1] [2] ||| Active=1",2,"[S] ||| [NP2] [VPSV] by [NP1] ||| [1] [2] by [3] ||| Passive=1",3,"[VP] ||| [V] [NP2] ||| [1] [2]",4,"[V] ||| ate ||| ate",5,"[VPSV] ||| was eaten ||| was eaten",6,"[NP1] ||| John ||| John",7,"[NP2] ||| broccoli ||| broccoli",8,"[NP2] ||| the broccoli ||| the broccoli ||| Definite=1",9,"[Goal] ||| [X] ||| [1]"],"features":["PhraseModel_0","PhraseModel_1","PhraseModel_2","PhraseModel_3","PhraseModel_4","PhraseModel_5","PhraseModel_6","PhraseModel_7","PhraseModel_8","PhraseModel_9","PhraseModel_10","PhraseModel_11","PhraseModel_12","PhraseModel_13","PhraseModel_14","PhraseModel_15","PhraseModel_16","PhraseModel_17","PhraseModel_18","PhraseModel_19","PhraseModel_20","PhraseModel_21","PhraseModel_22","PhraseModel_23","PhraseModel_24","PhraseModel_25","PhraseModel_26","PhraseModel_27","PhraseModel_28","PhraseModel_29","PhraseModel_30","PhraseModel_31","PhraseModel_32","PhraseModel_33","PhraseModel_34","PhraseModel_35","PhraseModel_36","PhraseModel_37","PhraseModel_38","PhraseModel_39","PhraseModel_40","PhraseModel_41","PhraseModel_42","PhraseModel_43","PhraseModel_44","PhraseModel_45","PhraseModel_46","PhraseModel_47","PhraseModel_48","PhraseModel_49","PhraseModel_50","PhraseModel_51","PhraseModel_52","PhraseModel_53","PhraseModel_54","PhraseModel_55","PhraseModel_56","PhraseModel_57","PhraseModel_58","PhraseModel_59","PhraseModel_60","PhraseModel_61","PhraseModel_62","PhraseModel_63","PhraseModel_64","PhraseModel_65","PhraseModel_66","PhraseModel_67","PhraseModel_68","PhraseModel_69","PhraseModel_70","PhraseModel_71","PhraseModel_72","PhraseModel_73","PhraseModel_74","PhraseModel_75","PhraseModel_76","PhraseModel_77","PhraseModel_78","PhraseModel_79","PhraseModel_80","PhraseModel_81","PhraseModel_82","PhraseModel_83","PhraseModel_84","PhraseModel_85","PhraseModel_86","PhraseModel_87","PhraseModel_88","PhraseModel_89","PhraseModel_90","PhraseModel_91","PhraseModel_92","PhraseModel_93","PhraseModel_94","PhraseModel_95","PhraseModel_96","PhraseModel_97","PhraseModel_98","PhraseModel_99","Active","Passive","Definite"],"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":6}],"node":{"in_edges":[0],"cat":"NP1","node_hash":"0000000000000006"},"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":4}],"node":{"in_edges":[1],"cat":"V","node_hash":"0000000000000004"},"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":7},{"tail":[],"spans":[-1,-1,-1,-1],"feats":[102,1],"rule":8}],"node":{"in_edges":[2,3],"cat":"NP2","node_hash":"0000000000000008"},"edges":[{"tail":[1,2],"spans":[-1,-1,-1,-1],"feats":[],"rule":3}],"node":{"in_edges":[4],"cat":"VP","node_hash":"0000000000000003"},"edges":[{"tail":[],"spans":[-1,-1,-1,-1],"feats":[],"rule":5}],"node":{"in_edges":[5],"cat":"VPSV","node_hash":"0000000000000005"},"edges":[{"tail":[0,3],"spans":[-1,-1,-1,-1],"feats":[100,1],"rule":1},{"tail":[2,4,0],"spans":[-1,-1,-1,-1],"feats":[101,1],"rule":2}],"node":{"in_edges":[6,7],"cat":"S","node_hash":"0000000000000002"},"edges":[{"tail":[5],"spans":[-1,-1,-1,-1],"feats":[],"rule":9}],"node":{"in_edges":[8],"cat":"Goal","node_hash":"000000000000003D"}} diff --git a/training/utils/grammar_convert.cc b/training/utils/grammar_convert.cc index 58d1957c..5c1b4d4a 100644 --- a/training/utils/grammar_convert.cc +++ b/training/utils/grammar_convert.cc @@ -56,15 +56,22 @@ int GetOrCreateNode(const WordID& lhs, map* lhs2node, Hypergraph* h return node_id - 1; } +void AddDummyGoalNode(Hypergraph* hg) { + static const int kGOAL = -TD::Convert("Goal"); + static TRulePtr kGOAL_RULE(new TRule("[Goal] ||| [X] ||| [1]")); + unsigned old_goal_node_idx = hg->nodes_.size() - 1; + HG::Node* goal_node = hg->AddNode(kGOAL); + goal_node->node_hash = goal_node->id_ * 10 + 1; + TailNodeVector tail(1, old_goal_node_idx); + HG::Edge* new_edge = hg->AddEdge(kGOAL_RULE, tail); + hg->ConnectEdgeToHeadNode(new_edge, goal_node); +} + void FilterAndCheckCorrectness(int goal, Hypergraph* hg) { if (goal < 0) { cerr << "Error! [S] not found in grammar!\n"; exit(1); } - if (hg->nodes_[goal].in_edges_.size() != 1) { - cerr << "Error! [S] has more than one rewrite!\n"; - exit(1); - } int old_size = hg->nodes_.size(); hg->TopologicallySortNodesAndEdges(goal); if (hg->nodes_.size() != old_size) { @@ -319,6 +326,7 @@ int main(int argc, char **argv) { if (line.empty()) { int goal = lhs2node[kSTART] - 1; FilterAndCheckCorrectness(goal, &hg); + AddDummyGoalNode(&hg); ProcessHypergraph(w, conf, "", &hg); hg.clear(); lhs2node.clear(); -- cgit v1.2.3