From df8ed8821287fc8172cace28e09c2cac9823bb8a Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Wed, 16 Jul 2014 20:56:49 +0200 Subject: in-place topological sort --- fast/.gitignore | 3 + fast/Makefile | 8 +-- fast/hypergraph.cc | 147 ++++++++++++++++++++++--------------------------- fast/hypergraph.hh | 65 +++++++++++----------- fast/main.cc | 157 +++++++++++++++++++++++++++-------------------------- 5 files changed, 185 insertions(+), 195 deletions(-) create mode 100644 fast/.gitignore (limited to 'fast') diff --git a/fast/.gitignore b/fast/.gitignore new file mode 100644 index 0000000..80d28d5 --- /dev/null +++ b/fast/.gitignore @@ -0,0 +1,3 @@ +fast_weaver +hypergraph.o +msgpack-c/ diff --git a/fast/Makefile b/fast/Makefile index a453fd3..f09ab21 100644 --- a/fast/Makefile +++ b/fast/Makefile @@ -1,12 +1,12 @@ all: hypergraph.o main.cc - clang -std=c++11 -lstdc++ main.cc hypergraph.o json/libjson.a -o fast_weaver + clang -std=c++11 -lstdc++ -lm hypergraph.o -I./msgpack-c/include/ main.cc -o fast_weaver hypergraph.o: hypergraph.cc hypergraph.hh grammar.o semiring.hh - clang -std=c++11 -lstdc++ -c hypergraph.cc -I./msgpack-c/include/ grammar.o ./msgpack-c/lib/libmsgpack.a + clang -std=c++11 -I./msgpack-c/include/ -c hypergraph.cc grammar.o: grammar.cc grammar.hh - clang -std=c++11 -lstdc++ -c grammar.cc + clang -std=c++11 -c grammar.cc clean: - rm fast_weaver hypergraph.o grammar.o + rm -f fast_weaver hypergraph.o grammar.o diff --git a/fast/hypergraph.cc b/fast/hypergraph.cc index 08b4d05..15b0a1f 100644 --- a/fast/hypergraph.cc +++ b/fast/hypergraph.cc @@ -5,126 +5,111 @@ namespace Hg { /* * Node - * methods * */ -string -Node::s() +bool +Node::is_marked() +{ + return mark >= incoming.size(); +} + +std::ostream& +operator<<(std::ostream& os, const Node& n) { - ostringstream os; os << \ - "Nodeid << \ - ", symbol=" << this->symbol << \ - ", span=(" << this->left << "," << this->right << ")" \ - ", outgoing:" << this->outgoing.size() << \ - ", incoming:" << this->incoming.size() << ">"; - return os.str(); + "Node"; } /* - * Hyperedge - * methods + * Edge * */ -unsigned int -Hyperedge::arity() -{ - return this->arity_; -} - bool -Hyperedge::is_marked() +Edge::is_marked() { - return (this->arity_ == this->mark); + return mark >= arity; } -string -Hyperedge::s() +std::ostream& +operator<<(std::ostream& os, const Edge& e) { - ostringstream os; - vector tails_ids; ostringstream _; - for_each(this->tails.begin(), this->tails.end(), [&_](Node* n) { _ << n->id; _ << ","; }); + for (auto it = e.tails.begin(); it != e.tails.end(); ++it) { + _ << (*it)->id; if (*it != e.tails.back()) _ << ","; + } os << \ - "Hyperedgehead->id << \ + "Edgeid << \ + "', tails=[" << _.str() << "]" \ + ", score=" << e.score << \ ", rule:'" << "TODO" << \ - "', tails=" << _.str() << \ - ", arity=" << this->arity() << \ - ", score=" << this->score << \ " , f=" << "TODO" << \ - ", mark=" << this->mark << ">"; - return os.str(); + ", arity=" << e.arity << \ + ", mark=" << e.mark << ">"; + return os; } - /* * Hypergraph * methods * */ -unsigned int -Hypergraph::arity() -{ - // TODO - return 0; -} - void Hypergraph::reset() { } -string -Hypergraph::s() -{ - // TODO - return ""; -} - -string -Hypergraph::json_s() -{ - // TODO - return ""; -} - /* * functions * */ -vector -topological_sort(vector& nodes) +void +topological_sort(list& nodes, list::iterator root) { - vector sorted; - vector tmp; - for_each(nodes.begin(), nodes.end(), [&tmp](Node* n) -> void { if (n->incoming.size()==0) tmp.push_back(n); }); - while (!tmp.empty()) { - Node* n = tmp.front(); - cout << n->id << endl; - tmp.erase(tmp.begin()); - sorted.push_back(n); - for (auto it = n->outgoing.begin(); it != n->outgoing.end(); it++) { // edges - if ((*it)->is_marked()) { - continue; - } - (*it)->mark++; - bool add = true; - for (auto jt = (*it)->head->incoming.begin(); jt != (*it)->head->incoming.end(); jt++) { - if (!(*jt)->is_marked()) { - add = false; - break; + auto p = root; + (**p).mark = 0; // is_marked()==true + auto to = nodes.begin(); + while (to != nodes.end()) { + + for (auto it = nodes.begin(); it != nodes.end(); ++it) cout << (**it).id << " "; + cout << endl; + + if ((**p).is_marked()) { + // explore edges + for (auto e = (**p).outgoing.begin(); e!=(**p).outgoing.end(); ++e) { + (**e).mark++; + if ((**e).is_marked()) { + (**e).head->mark++; } } - if (add==true) { - tmp.push_back( (*it)->head ); - cout << " " << (*it)->head->id<< endl; - } + } + if ((**p).is_marked()) { + nodes.splice(to, nodes, p); + to = next(p); + p = to; + } else { + ++p; + /*if (p == nodes.end()) { + for (auto e = (**to).outgoing.begin(); e!=(**to).outgoing.end(); ++e) { + // explore edges + (**e).mark++; + if ((**e).is_marked()) { + (**e).head->mark++; + } + } + to++; + p = to; + }*/ } } - return sorted; } -void +/*void init(vector& nodes, ViterbiSemiring& semiring, Node* root) { for (auto it = nodes.begin(); it != nodes.end(); ++it) @@ -139,7 +124,7 @@ viterbi(vector& nodes, map nodes_by_id, Node* ro ViterbiSemiring semiring; init(sorted, semiring, root); - + for (auto n_it = sorted.begin(); n_it != sorted.end(); ++n_it) { for (auto e_it = (*n_it)->incoming.begin(); e_it != (*n_it)->incoming.end(); ++e_it) { cout << (*e_it)->s() << endl; @@ -154,7 +139,7 @@ viterbi(vector& nodes, map nodes_by_id, Node* ro for (auto it = sorted.begin(); it != sorted.end(); ++it) { cout << (*it)->id << " " << (*it)->score << endl; } -} +}*/ } // namespace diff --git a/fast/hypergraph.hh b/fast/hypergraph.hh index e5f91cb..68cca19 100644 --- a/fast/hypergraph.hh +++ b/fast/hypergraph.hh @@ -7,75 +7,74 @@ #include #include #include -#include +#include +#include #include #include +#include -#include +#include "msgpack-c/include/msgpack.hpp" using namespace std; typedef double score_t; typedef double weight_t; -typedef size_t id_t; namespace Hg { - class Node; class Edge { public: - id_t head; - vector tails; + Node* head; + vector tails; score_t score; + //Grammar::Rule rule; FIXME vector f; + unsigned int arity; unsigned int mark; - //Grammar::Rule rule; FIXME - unsigned int arity_; - unsigned int arity(); bool is_marked(); - string s(); + friend std::ostream& operator<<(std::ostream& os, const Edge& s); - MSGPACK_DEFINE(head, tails, score, f, mark, arity_); + size_t head_id_; + vector tails_ids_; // node ids + MSGPACK_DEFINE(head_id_, tails_ids_, score, f, arity); }; - class Node { public: - unsigned int id; - string symbol; - unsigned int left; - unsigned int right; - score_t score; - vector outgoing; - vector incoming; + size_t id; + string symbol; + unsigned short left; + unsigned short right; + score_t score; + vector incoming; + vector outgoing; + unsigned int mark; - string s(); + bool is_marked(); + friend std::ostream& operator<<(std::ostream& os, const Node& n); - MSGPACK_DEFINE(id, symbol, left, right, score, outgoing, incoming); + vector incoming_ids_; // edge ids + vector outgoing_ids_; // edge ids + MSGPACK_DEFINE(id, symbol, left, right, score, incoming_ids_, outgoing_ids_); }; - - class Hypergraph { public: - vector nodes; - vector edges; - unsigned int arity_; + list nodes; + vector edges; + unordered_map nodes_by_id; + unsigned int arity; - unsigned int arity(); void reset(); - string s(); - string json_s(); - - MSGPACK_DEFINE(nodes, edges, arity_, nodes_by_id); + void add_node(Node* n) { nodes.push_back(n); nodes_by_id[n->id] = n; } }; -vector topological_sort(vector& nodes); -void viterbi(vector& nodes, map nodes_by_id, Node* root); +void topological_sort(list& nodes, list::iterator root); +void viterbi(Hypergraph& hg); } // namespace diff --git a/fast/main.cc b/fast/main.cc index c951cd8..372f0f1 100644 --- a/fast/main.cc +++ b/fast/main.cc @@ -34,105 +34,108 @@ main(void) ] } */ + Hg::Hypergraph hg; // nodes - Hg::Node a; a.id = 0; a.symbol = "root"; a.left = false; a.right = false; - Hg::Node b; b.id = 1; b.symbol = "NP"; b.left = 0; b.right = 1; - Hg::Node c; c.id = 2; c.symbol = "V"; c.left = 1; c.right = 2; - Hg::Node d; d.id = 3; d.symbol = "JJ"; d.left = 3; d.right = 4; - Hg::Node e; e.id = 4; e.symbol = "NN"; e.left = 3; e.right = 5; - Hg::Node f; f.id = 5; f.symbol = "NP"; f.left = 2; f.right = 5; - Hg::Node g; g.id = 6; g.symbol = "NP"; g.left = 1; g.right = 5; - Hg::Node h; h.id = 7; h.symbol = "S"; g.left = 0; g.right = 5; - - vector nodes; - nodes.push_back(&a); - nodes.push_back(&b); - nodes.push_back(&c); - nodes.push_back(&d); - nodes.push_back(&e); - nodes.push_back(&f); - nodes.push_back(&g); - nodes.push_back(&h); - - // nodes_by_id - map nodes_by_id; - nodes_by_id[0] = &a; - nodes_by_id[1] = &b; - nodes_by_id[2] = &c; - nodes_by_id[3] = &d; - nodes_by_id[4] = &e; - nodes_by_id[5] = &f; - nodes_by_id[6] = &g; - nodes_by_id[7] = &h; + Hg::Node a; a.id = 0; a.symbol = "root"; a.left = false; a.right = false; a.mark = 0; + Hg::Node b; b.id = 1; b.symbol = "NP"; b.left = 0; b.right = 1; b.mark = 0; + Hg::Node c; c.id = 2; c.symbol = "V"; c.left = 1; c.right = 2; c.mark = 0; + Hg::Node d; d.id = 3; d.symbol = "JJ"; d.left = 3; d.right = 4; d.mark = 0; + Hg::Node e; e.id = 4; e.symbol = "NN"; e.left = 3; e.right = 5; e.mark = 0; + Hg::Node f; f.id = 5; f.symbol = "NP"; f.left = 2; f.right = 5; f.mark = 0; + Hg::Node g; g.id = 6; g.symbol = "NP"; g.left = 1; g.right = 5; g.mark = 0; + Hg::Node h; h.id = 7; h.symbol = "S"; h.left = 0; h.right = 6; h.mark = 0; + + hg.add_node(&a); + hg.add_node(&h); + hg.add_node(&g); + hg.add_node(&c); + hg.add_node(&d); + hg.add_node(&f); + hg.add_node(&b); + hg.add_node(&e); // edges - Hg::Hyperedge q; q.head = nodes_by_id[1]; q.tails.push_back(nodes_by_id[0]); q.score = 0.367879441171; - nodes_by_id[1]->incoming.push_back(&q); - nodes_by_id[0]->outgoing.push_back(&q); - q.arity_ = 1; + Hg::Edge q; q.head = hg.nodes_by_id[1]; q.tails.push_back(hg.nodes_by_id[0]); q.score = 0.367879441171; + hg.nodes_by_id[1]->incoming.push_back(&q); + hg.nodes_by_id[0]->outgoing.push_back(&q); + q.arity = 1; q.mark = 0; + hg.edges.push_back(&q); - Hg::Hyperedge p; p.head = nodes_by_id[2]; p.tails.push_back(nodes_by_id[0]); p.score = 0.606530659713; - nodes_by_id[2]->incoming.push_back(&p); - nodes_by_id[0]->outgoing.push_back(&p); - p.arity_ = 1; + Hg::Edge p; p.head = hg.nodes_by_id[2]; p.tails.push_back(hg.nodes_by_id[0]); p.score = 0.606530659713; + hg.nodes_by_id[2]->incoming.push_back(&p); + hg.nodes_by_id[0]->outgoing.push_back(&p); + p.arity = 1; p.mark = 0; + hg.edges.push_back(&p); - Hg::Hyperedge r; r.head = nodes_by_id[3]; r.tails.push_back(nodes_by_id[0]); r.score = 1.0; - nodes_by_id[3]->incoming.push_back(&r); - nodes_by_id[0]->outgoing.push_back(&r); - r.arity_ = 1; + Hg::Edge r; r.head = hg.nodes_by_id[3]; r.tails.push_back(hg.nodes_by_id[0]); r.score = 1.0; + hg.nodes_by_id[3]->incoming.push_back(&r); + hg.nodes_by_id[0]->outgoing.push_back(&r); + r.arity = 1; r.mark = 0; - Hg::Hyperedge s; s.head = nodes_by_id[3]; s.tails.push_back(nodes_by_id[0]); s.score = 1.0; - nodes_by_id[3]->incoming.push_back(&s); - nodes_by_id[0]->outgoing.push_back(&s); - s.arity_ = 1; + hg.edges.push_back(&r); + + Hg::Edge s; s.head = hg.nodes_by_id[3]; s.tails.push_back(hg.nodes_by_id[0]); s.score = 1.0; + hg.nodes_by_id[3]->incoming.push_back(&s); + hg.nodes_by_id[0]->outgoing.push_back(&s); + s.arity = 1; s.mark = 0; + hg.edges.push_back(&s); - Hg::Hyperedge t; t.head = nodes_by_id[4]; t.tails.push_back(nodes_by_id[0]); t.score = 1.0; - nodes_by_id[4]->incoming.push_back(&t); - nodes_by_id[0]->outgoing.push_back(&t); - t.arity_ = 1; + Hg::Edge t; t.head = hg.nodes_by_id[4]; t.tails.push_back(hg.nodes_by_id[0]); t.score = 1.0; + hg.nodes_by_id[4]->incoming.push_back(&t); + hg.nodes_by_id[0]->outgoing.push_back(&t); + t.arity = 1; t.mark = 0; - Hg::Hyperedge u; u.head = nodes_by_id[4]; u.tails.push_back(nodes_by_id[0]); u.score = 1.0; - nodes_by_id[4]->incoming.push_back(&u); - nodes_by_id[0]->outgoing.push_back(&u); - u.arity_ = 1; + hg.edges.push_back(&t); + + Hg::Edge u; u.head = hg.nodes_by_id[4]; u.tails.push_back(hg.nodes_by_id[0]); u.score = 1.0; + hg.nodes_by_id[4]->incoming.push_back(&u); + hg.nodes_by_id[0]->outgoing.push_back(&u); + u.arity = 1; u.mark = 0; + hg.edges.push_back(&u); - Hg::Hyperedge v; v.head = nodes_by_id[4]; v.tails.push_back(nodes_by_id[3]); v.score = 1.0; - nodes_by_id[4]->incoming.push_back(&v); - nodes_by_id[3]->outgoing.push_back(&v); - v.arity_ = 1; + Hg::Edge v; v.head = hg.nodes_by_id[4]; v.tails.push_back(hg.nodes_by_id[3]); v.score = 1.0; + hg.nodes_by_id[4]->incoming.push_back(&v); + hg.nodes_by_id[3]->outgoing.push_back(&v); + v.arity = 1; v.mark = 0; - Hg::Hyperedge w; w.head = nodes_by_id[4]; w.tails.push_back(nodes_by_id[3]); w.score = 2.71828182846; - nodes_by_id[4]->incoming.push_back(&w); - nodes_by_id[3]->outgoing.push_back(&w); - w.arity_ = 1; + hg.edges.push_back(&v); + + Hg::Edge w; w.head = hg.nodes_by_id[4]; w.tails.push_back(hg.nodes_by_id[3]); w.score = 2.71828182846; + hg.nodes_by_id[4]->incoming.push_back(&w); + hg.nodes_by_id[3]->outgoing.push_back(&w); + w.arity = 1; w.mark = 0; + hg.edges.push_back(&w); - Hg::Hyperedge x; x.head = nodes_by_id[5]; x.tails.push_back(nodes_by_id[4]); x.score = 1.0; - nodes_by_id[5]->incoming.push_back(&x); - nodes_by_id[4]->outgoing.push_back(&x); - x.arity_ = 1; + Hg::Edge x; x.head = hg.nodes_by_id[5]; x.tails.push_back(hg.nodes_by_id[4]); x.score = 1.0; + hg.nodes_by_id[5]->incoming.push_back(&x); + hg.nodes_by_id[4]->outgoing.push_back(&x); + x.arity = 1; x.mark = 0; + hg.edges.push_back(&x); - Hg::Hyperedge y; y.head = nodes_by_id[6]; y.tails.push_back(nodes_by_id[2]); y.tails.push_back(nodes_by_id[5]); y.score = 1.0; - nodes_by_id[6]->incoming.push_back(&y); - nodes_by_id[2]->outgoing.push_back(&y); - nodes_by_id[5]->outgoing.push_back(&y); - y.arity_ = 2; + Hg::Edge y; y.head = hg.nodes_by_id[6]; y.tails.push_back(hg.nodes_by_id[2]); y.tails.push_back(hg.nodes_by_id[5]); y.score = 1.0; + hg.nodes_by_id[6]->incoming.push_back(&y); + hg.nodes_by_id[2]->outgoing.push_back(&y); + hg.nodes_by_id[5]->outgoing.push_back(&y); + y.arity = 2; y.mark = 0; + hg.edges.push_back(&y); - Hg::Hyperedge z; z.head = nodes_by_id[7]; z.tails.push_back(nodes_by_id[1]); z.tails.push_back(nodes_by_id[6]); z.score = 1.0; - nodes_by_id[7]->incoming.push_back(&z); - nodes_by_id[1]->outgoing.push_back(&z); - nodes_by_id[6]->outgoing.push_back(&z); - z.arity_ = 2; + Hg::Edge z; z.head = hg.nodes_by_id[7]; z.tails.push_back(hg.nodes_by_id[1]); z.tails.push_back(hg.nodes_by_id[6]); z.score = 1.0; + hg.nodes_by_id[7]->incoming.push_back(&z); + hg.nodes_by_id[1]->outgoing.push_back(&z); + hg.nodes_by_id[6]->outgoing.push_back(&z); + z.arity = 2; z.mark = 0; + hg.edges.push_back(&z); - // test - Hg::viterbi(nodes, nodes_by_id, nodes_by_id[0]); + Hg::topological_sort(hg.nodes, hg.nodes.begin()); + //Hg::viterbi(nodes, hg.nodes, hg.nodes_by_id(0]); } -- cgit v1.2.3