From e32e9fdd48ef6466fbb257d92e250816f5b69114 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 7 Apr 2014 00:54:52 -0400 Subject: clean up dead TRule code --- decoder/hg_test.cc | 60 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 20 deletions(-) (limited to 'decoder/hg_test.cc') diff --git a/decoder/hg_test.cc b/decoder/hg_test.cc index 8519e559..95cfae51 100644 --- a/decoder/hg_test.cc +++ b/decoder/hg_test.cc @@ -18,8 +18,10 @@ using namespace std; BOOST_FIXTURE_TEST_SUITE( s, HGSetup ); BOOST_AUTO_TEST_CASE(Controlled) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); + cerr << "PATH: " << path << "/hg.tiny\n"; Hypergraph hg; - CreateHG_tiny(&hg); + CreateHG_tiny(path, &hg); SparseVector wts; wts.set_value(FD::Convert("f1"), 0.4); wts.set_value(FD::Convert("f2"), 0.8); @@ -37,10 +39,11 @@ BOOST_AUTO_TEST_CASE(Controlled) { } BOOST_AUTO_TEST_CASE(Union) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg1; Hypergraph hg2; - CreateHG_tiny(&hg1); - CreateHG(&hg2); + CreateHG_tiny(path, &hg1); + CreateHG(path, &hg2); SparseVector wts; wts.set_value(FD::Convert("f1"), 0.4); wts.set_value(FD::Convert("f2"), 1.0); @@ -84,8 +87,9 @@ BOOST_AUTO_TEST_CASE(Union) { } BOOST_AUTO_TEST_CASE(ControlledKBest) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG(&hg); + CreateHG(path, &hg); vector w(2); w[0]=0.4; w[1]=0.8; hg.Reweight(w); vector trans; @@ -107,10 +111,11 @@ BOOST_AUTO_TEST_CASE(ControlledKBest) { BOOST_AUTO_TEST_CASE(InsideScore) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); SparseVector wts; wts.set_value(FD::Convert("f1"), 1.0); Hypergraph hg; - CreateTinyLatticeHG(&hg); + CreateTinyLatticeHG(path, &hg); hg.Reweight(wts); vector trans; prob_t cost = ViterbiESentence(hg, &trans); @@ -130,10 +135,11 @@ BOOST_AUTO_TEST_CASE(InsideScore) { BOOST_AUTO_TEST_CASE(PruneInsideOutside) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); SparseVector wts; wts.set_value(FD::Convert("Feature_1"), 1.0); Hypergraph hg; - CreateLatticeHG(&hg); + CreateLatticeHG(path, &hg); hg.Reweight(wts); vector trans; prob_t cost = ViterbiESentence(hg, &trans); @@ -152,8 +158,9 @@ BOOST_AUTO_TEST_CASE(PruneInsideOutside) { } BOOST_AUTO_TEST_CASE(TestPruneEdges) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateLatticeHG(&hg); + CreateLatticeHG(path, &hg); SparseVector wts; wts.set_value(FD::Convert("f1"), 1.0); hg.Reweight(wts); @@ -166,8 +173,9 @@ BOOST_AUTO_TEST_CASE(TestPruneEdges) { } BOOST_AUTO_TEST_CASE(TestIntersect) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG_int(&hg); + CreateHG_int(path, &hg); SparseVector wts; wts.set_value(FD::Convert("f1"), 1.0); hg.Reweight(wts); @@ -192,8 +200,9 @@ BOOST_AUTO_TEST_CASE(TestIntersect) { } BOOST_AUTO_TEST_CASE(TestPrune2) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG_int(&hg); + CreateHG_int(path, &hg); SparseVector wts; wts.set_value(FD::Convert("f1"), 1.0); hg.Reweight(wts); @@ -207,8 +216,9 @@ BOOST_AUTO_TEST_CASE(TestPrune2) { } BOOST_AUTO_TEST_CASE(Sample) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateLatticeHG(&hg); + CreateLatticeHG(path, &hg); SparseVector wts; wts.set_value(FD::Convert("Feature_1"), 0.0); hg.Reweight(wts); @@ -220,6 +230,7 @@ BOOST_AUTO_TEST_CASE(Sample) { } BOOST_AUTO_TEST_CASE(PLF) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; string inplf = "((('haupt',-2.06655,1),('hauptgrund',-5.71033,2),),(('grund',-1.78709,1),),(('für\\'',0.1,1),),)"; HypergraphIO::ReadFromPLF(inplf, &hg); @@ -234,8 +245,9 @@ BOOST_AUTO_TEST_CASE(PLF) { } BOOST_AUTO_TEST_CASE(PushWeightsToGoal) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG(&hg); + CreateHG(path, &hg); vector w(2); w[0]=0.4; w[1]=0.8; hg.Reweight(w); vector trans; @@ -248,8 +260,9 @@ BOOST_AUTO_TEST_CASE(PushWeightsToGoal) { } BOOST_AUTO_TEST_CASE(TestSpecialKBest) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHGBalanced(&hg); + CreateHGBalanced(path, &hg); vector w(1); w[0]=0; hg.Reweight(w); vector, prob_t> > list; @@ -264,8 +277,9 @@ BOOST_AUTO_TEST_CASE(TestSpecialKBest) { } BOOST_AUTO_TEST_CASE(TestGenericViterbi) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG_tiny(&hg); + CreateHG_tiny(path, &hg); SparseVector wts; wts.set_value(FD::Convert("f1"), 0.4); wts.set_value(FD::Convert("f2"), 0.8); @@ -279,8 +293,9 @@ BOOST_AUTO_TEST_CASE(TestGenericViterbi) { } BOOST_AUTO_TEST_CASE(TestGenericInside) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateTinyLatticeHG(&hg); + CreateTinyLatticeHG(path, &hg); SparseVector wts; wts.set_value(FD::Convert("f1"), 1.0); hg.Reweight(wts); @@ -296,8 +311,9 @@ BOOST_AUTO_TEST_CASE(TestGenericInside) { } BOOST_AUTO_TEST_CASE(TestGenericInside2) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG(&hg); + CreateHG(path, &hg); SparseVector wts; wts.set_value(FD::Convert("f1"), 0.4); wts.set_value(FD::Convert("f2"), 0.8); @@ -322,8 +338,9 @@ BOOST_AUTO_TEST_CASE(TestGenericInside2) { } BOOST_AUTO_TEST_CASE(TestAddExpectations) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG(&hg); + CreateHG(path, &hg); SparseVector wts; wts.set_value(FD::Convert("f1"), 0.4); wts.set_value(FD::Convert("f2"), 0.8); @@ -338,8 +355,8 @@ BOOST_AUTO_TEST_CASE(TestAddExpectations) { } BOOST_AUTO_TEST_CASE(Small) { - Hypergraph hg; std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); + Hypergraph hg; CreateSmallHG(&hg, path); SparseVector wts; wts.set_value(FD::Convert("Model_0"), -2.0); @@ -361,6 +378,7 @@ BOOST_AUTO_TEST_CASE(Small) { } BOOST_AUTO_TEST_CASE(JSONTest) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); ostringstream os; JSONParser::WriteEscapedString("\"I don't know\", she said.", &os); BOOST_CHECK_EQUAL("\"\\\"I don't know\\\", she said.\"", os.str()); @@ -370,9 +388,10 @@ BOOST_AUTO_TEST_CASE(JSONTest) { } BOOST_AUTO_TEST_CASE(TestGenericKBest) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg; - CreateHG(&hg); - //CreateHGBalanced(&hg); + CreateHG(path, &hg); + //CreateHGBalanced(path, &hg); SparseVector wts; wts.set_value(FD::Convert("f1"), 0.4); wts.set_value(FD::Convert("f2"), 1.0); @@ -392,8 +411,9 @@ BOOST_AUTO_TEST_CASE(TestGenericKBest) { } BOOST_AUTO_TEST_CASE(TestReadWriteHG) { + std::string path(boost::unit_test::framework::master_test_suite().argc == 2 ? boost::unit_test::framework::master_test_suite().argv[1] : TEST_DATA); Hypergraph hg,hg2; - CreateHG(&hg); + CreateHG(path, &hg); hg.edges_.front().j_ = 23; hg.edges_.back().prev_i_ = 99; ostringstream os; -- cgit v1.2.3 From 71c1f8b274e4f0e83252fe3c68fb45c5ec4069e6 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 8 Apr 2014 23:16:24 -0400 Subject: smarter union --- decoder/hg.cc | 1 + decoder/hg_test.cc | 17 +++++++++ decoder/hg_union.cc | 105 +++++++++++++++++++++++++++++++++++----------------- 3 files changed, 89 insertions(+), 34 deletions(-) (limited to 'decoder/hg_test.cc') diff --git a/decoder/hg.cc b/decoder/hg.cc index 405169c6..e456fa7c 100644 --- a/decoder/hg.cc +++ b/decoder/hg.cc @@ -396,6 +396,7 @@ void Hypergraph::PrintGraphviz() const { for (const auto& node : nodes_) { cerr << " " << node.id_ << "[label=\"" << (node.cat_ < 0 ? TD::Convert(node.cat_ * -1) : "") << " n=" << node.id_ + << " h=" << node.node_hash << "\"];\n"; } cerr << "}\n"; diff --git a/decoder/hg_test.cc b/decoder/hg_test.cc index 95cfae51..5cb8626a 100644 --- a/decoder/hg_test.cc +++ b/decoder/hg_test.cc @@ -44,6 +44,13 @@ BOOST_AUTO_TEST_CASE(Union) { Hypergraph hg2; CreateHG_tiny(path, &hg1); CreateHG(path, &hg2); + int nc = 0; + for (auto& node: hg1.nodes_) + node.node_hash = ++nc; + for (auto& node: hg2.nodes_) + node.node_hash = ++nc; + hg1.nodes_.back().node_hash = nc; + SparseVector wts; wts.set_value(FD::Convert("f1"), 0.4); wts.set_value(FD::Convert("f2"), 1.0); @@ -56,8 +63,11 @@ BOOST_AUTO_TEST_CASE(Union) { int l2 = ViterbiPathLength(hg2); cerr << c1 << "\t" << TD::GetString(t1) << endl; cerr << c2 << "\t" << TD::GetString(t2) << endl; + hg1.PrintGraphviz(); + hg2.PrintGraphviz(); HG::Union(hg2, &hg1); hg1.Reweight(wts); + hg1.PrintGraphviz(); c3 = ViterbiESentence(hg1, &t3); int l3 = ViterbiPathLength(hg1); cerr << c3 << "\t" << TD::GetString(t3) << endl; @@ -84,6 +94,13 @@ BOOST_AUTO_TEST_CASE(Union) { BOOST_CHECK_CLOSE(log(list[0].second), log(c4), 1e-4); BOOST_CHECK_EQUAL(list.size(), 6); BOOST_CHECK_CLOSE(log(list.back().second / list.front().second), -97.7, 1e-4); + hg1 = hg2; + BOOST_CHECK_EQUAL(hg1.nodes_.size(), hg2.nodes_.size()); + BOOST_CHECK_EQUAL(hg1.edges_.size(), hg2.edges_.size()); + HG::Union(hg1, &hg2); // this should be a no-op + BOOST_CHECK_EQUAL(hg1.nodes_.size(), hg2.nodes_.size()); + BOOST_CHECK_EQUAL(hg1.edges_.size(), hg2.edges_.size()); + cerr << "DONE UNION\n"; } BOOST_AUTO_TEST_CASE(ControlledKBest) { diff --git a/decoder/hg_union.cc b/decoder/hg_union.cc index 37082976..4899e716 100644 --- a/decoder/hg_union.cc +++ b/decoder/hg_union.cc @@ -1,56 +1,93 @@ #include "hg_union.h" +#ifndef HAVE_OLD_CPP +# include +#else +# include +namespace std { using std::tr1::unordered_set; } +#endif + #include "hg.h" +#include "sparse_vector.h" using namespace std; namespace HG { +static bool EdgesMatch(const HG::Edge& a, const Hypergraph& ahg, const HG::Edge& b, const Hypergraph& bhg) { + const unsigned arity = a.tail_nodes_.size(); + if (arity != b.tail_nodes_.size()) return false; + if (a.rule_->e() != b.rule_->e()) return false; + if (a.rule_->f() != b.rule_->f()) return false; + + for (unsigned i = 0; i < arity; ++i) + if (ahg.nodes_[a.tail_nodes_[i]].node_hash != bhg.nodes_[b.tail_nodes_[i]].node_hash) return false; + const SparseVector diff = a.feature_values_ - b.feature_values_; + for (auto& kv : diff) + if (fabs(kv.second) > 1e-6) return false; + return true; +} + void Union(const Hypergraph& in, Hypergraph* out) { if (&in == out) return; if (out->nodes_.empty()) { out->nodes_ = in.nodes_; out->edges_ = in.edges_; return; } - unsigned noff = out->nodes_.size(); - unsigned eoff = out->edges_.size(); - int ogoal = in.nodes_.size() - 1; - int cgoal = noff - 1; - // keep a single goal node, so add nodes.size - 1 - out->nodes_.resize(out->nodes_.size() + ogoal); - // add all edges - out->edges_.resize(out->edges_.size() + in.edges_.size()); - - for (int i = 0; i < ogoal; ++i) { - const Hypergraph::Node& on = in.nodes_[i]; - Hypergraph::Node& cn = out->nodes_[i + noff]; - cn.id_ = i + noff; - cn.in_edges_.resize(on.in_edges_.size()); - for (unsigned j = 0; j < on.in_edges_.size(); ++j) - cn.in_edges_[j] = on.in_edges_[j] + eoff; - - cn.out_edges_.resize(on.out_edges_.size()); - for (unsigned j = 0; j < on.out_edges_.size(); ++j) - cn.out_edges_[j] = on.out_edges_[j] + eoff; + if (!in.AreNodesUniquelyIdentified()) { + cerr << "Union: Nodes are not uniquely identified in input!\n"; + abort(); + } + if (!out->AreNodesUniquelyIdentified()) { + cerr << "Union: Nodes are not uniquely identified in output!\n"; + abort(); } + if (out->nodes_.back().node_hash != in.nodes_.back().node_hash) { + cerr << "Union: Goal nodes are mismatched!\n"; + abort(); + } + const int cgoal = out->nodes_.back().id_; - for (unsigned i = 0; i < in.edges_.size(); ++i) { - const Hypergraph::Edge& oe = in.edges_[i]; - Hypergraph::Edge& ce = out->edges_[i + eoff]; - ce.id_ = i + eoff; - ce.rule_ = oe.rule_; - ce.feature_values_ = oe.feature_values_; - if (oe.head_node_ == ogoal) { - ce.head_node_ = cgoal; - out->nodes_[cgoal].in_edges_.push_back(ce.id_); - } else { - ce.head_node_ = oe.head_node_ + noff; + unordered_map h2n; + for (const auto& node : out->nodes_) + h2n[node.node_hash] = node.id_; + for (const auto& node : in.nodes_) { + if (h2n.count(node.node_hash) == 0) { + HG::Node* new_node = out->AddNode(node.cat_); + new_node->node_hash = node.node_hash; + h2n[node.node_hash] = new_node->id_; } - ce.tail_nodes_.resize(oe.tail_nodes_.size()); - for (unsigned j = 0; j < oe.tail_nodes_.size(); ++j) - ce.tail_nodes_[j] = oe.tail_nodes_[j] + noff; } + for (const auto& in_node : in.nodes_) { + HG::Node& out_node = out->nodes_[h2n[in_node.node_hash]]; + for (const auto oeid : out_node.in_edges_) { + // TODO hash currently existing edges for quick check for duplication + } + for (const auto ieid : in_node.in_edges_) { + const HG::Edge& in_edge = in.edges_[ieid]; + // TODO: replace slow N^2 check with hashing + bool edge_exists = false; + for (const auto oeid : out_node.in_edges_) { + if (EdgesMatch(in_edge, in, out->edges_[oeid], *out)) { + edge_exists = true; + break; + } + } + if (!edge_exists) { + const unsigned arity = in_edge.tail_nodes_.size(); + TailNodeVector t(arity); + HG::Node& head = out->nodes_[h2n[in_node.node_hash]]; + for (unsigned i = 0; i < arity; ++i) + t[i] = h2n[in.nodes_[in_edge.tail_nodes_[i]].node_hash]; + HG::Edge* new_edge = out->AddEdge(in_edge, t); + out->ConnectEdgeToHeadNode(new_edge, &head); + //cerr << "Created: " << new_edge->rule_->AsString() << " [head=" << new_edge->head_node_ << "]\n"; + } //else { + // cerr << "Not created: " << in.edges_[ieid].rule_->AsString() << "\n"; + //} + } + } out->TopologicallySortNodesAndEdges(cgoal); } -- cgit v1.2.3