summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-07-16 20:56:49 +0200
committerPatrick Simianer <p@simianer.de>2014-07-16 20:56:49 +0200
commitdf8ed8821287fc8172cace28e09c2cac9823bb8a (patch)
tree532d3bb0c01479da753c2e6cf871365a1f585ea7
parenta7ca2c4f4cd4f6f88c805bf89d9ee6ac55941a89 (diff)
in-place topological sort
-rw-r--r--fast/.gitignore3
-rw-r--r--fast/Makefile8
-rw-r--r--fast/hypergraph.cc147
-rw-r--r--fast/hypergraph.hh65
-rw-r--r--fast/main.cc157
5 files changed, 185 insertions, 195 deletions
diff --git a/fast/.gitignore b/fast/.gitignore
new file mode 100644
index 0000000..80d28d5
--- /dev/null
+++ b/fast/.gitignore
@@ -0,0 +1,3 @@
+fast_weaver
+hypergraph.o
+msgpack-c/
diff --git a/fast/Makefile b/fast/Makefile
index a453fd3..f09ab21 100644
--- a/fast/Makefile
+++ b/fast/Makefile
@@ -1,12 +1,12 @@
all: hypergraph.o main.cc
- clang -std=c++11 -lstdc++ main.cc hypergraph.o json/libjson.a -o fast_weaver
+ clang -std=c++11 -lstdc++ -lm hypergraph.o -I./msgpack-c/include/ main.cc -o fast_weaver
hypergraph.o: hypergraph.cc hypergraph.hh grammar.o semiring.hh
- clang -std=c++11 -lstdc++ -c hypergraph.cc -I./msgpack-c/include/ grammar.o ./msgpack-c/lib/libmsgpack.a
+ clang -std=c++11 -I./msgpack-c/include/ -c hypergraph.cc
grammar.o: grammar.cc grammar.hh
- clang -std=c++11 -lstdc++ -c grammar.cc
+ clang -std=c++11 -c grammar.cc
clean:
- rm fast_weaver hypergraph.o grammar.o
+ rm -f fast_weaver hypergraph.o grammar.o
diff --git a/fast/hypergraph.cc b/fast/hypergraph.cc
index 08b4d05..15b0a1f 100644
--- a/fast/hypergraph.cc
+++ b/fast/hypergraph.cc
@@ -5,126 +5,111 @@ namespace Hg {
/*
* Node
- * methods
*
*/
-string
-Node::s()
+bool
+Node::is_marked()
+{
+ return mark >= incoming.size();
+}
+
+std::ostream&
+operator<<(std::ostream& os, const Node& n)
{
- ostringstream os;
os << \
- "Node<id=" << this->id << \
- ", symbol=" << this->symbol << \
- ", span=(" << this->left << "," << this->right << ")" \
- ", outgoing:" << this->outgoing.size() << \
- ", incoming:" << this->incoming.size() << ">";
- return os.str();
+ "Node<id=" << n.id << \
+ ", symbol=" << n.symbol << \
+ ", span=(" << n.left << "," << n.right << ")" \
+ ", score=" << n.score << \
+ ", incoming:" << n.incoming.size() << \
+ ", outgoing:" << n.outgoing.size() << \
+ ", mark=" << n.mark << ">";
}
/*
- * Hyperedge
- * methods
+ * Edge
*
*/
-unsigned int
-Hyperedge::arity()
-{
- return this->arity_;
-}
-
bool
-Hyperedge::is_marked()
+Edge::is_marked()
{
- return (this->arity_ == this->mark);
+ return mark >= arity;
}
-string
-Hyperedge::s()
+std::ostream&
+operator<<(std::ostream& os, const Edge& e)
{
- ostringstream os;
- vector<unsigned int> tails_ids;
ostringstream _;
- for_each(this->tails.begin(), this->tails.end(), [&_](Node* n) { _ << n->id; _ << ","; });
+ for (auto it = e.tails.begin(); it != e.tails.end(); ++it) {
+ _ << (*it)->id; if (*it != e.tails.back()) _ << ",";
+ }
os << \
- "Hyperedge<head=" << this->head->id << \
+ "Edge<head=" << e.head->id << \
+ "', tails=[" << _.str() << "]" \
+ ", score=" << e.score << \
", rule:'" << "TODO" << \
- "', tails=" << _.str() << \
- ", arity=" << this->arity() << \
- ", score=" << this->score << \
" , f=" << "TODO" << \
- ", mark=" << this->mark << ">";
- return os.str();
+ ", arity=" << e.arity << \
+ ", mark=" << e.mark << ">";
+ return os;
}
-
/*
* Hypergraph
* methods
*
*/
-unsigned int
-Hypergraph::arity()
-{
- // TODO
- return 0;
-}
-
void
Hypergraph::reset()
{
}
-string
-Hypergraph::s()
-{
- // TODO
- return "";
-}
-
-string
-Hypergraph::json_s()
-{
- // TODO
- return "";
-}
-
/*
* functions
*
*/
-vector<Node*>
-topological_sort(vector<Node*>& nodes)
+void
+topological_sort(list<Node*>& nodes, list<Node*>::iterator root)
{
- vector<Node*> sorted;
- vector<Node*> tmp;
- for_each(nodes.begin(), nodes.end(), [&tmp](Node* n) -> void { if (n->incoming.size()==0) tmp.push_back(n); });
- while (!tmp.empty()) {
- Node* n = tmp.front();
- cout << n->id << endl;
- tmp.erase(tmp.begin());
- sorted.push_back(n);
- for (auto it = n->outgoing.begin(); it != n->outgoing.end(); it++) { // edges
- if ((*it)->is_marked()) {
- continue;
- }
- (*it)->mark++;
- bool add = true;
- for (auto jt = (*it)->head->incoming.begin(); jt != (*it)->head->incoming.end(); jt++) {
- if (!(*jt)->is_marked()) {
- add = false;
- break;
+ auto p = root;
+ (**p).mark = 0; // is_marked()==true
+ auto to = nodes.begin();
+ while (to != nodes.end()) {
+
+ for (auto it = nodes.begin(); it != nodes.end(); ++it) cout << (**it).id << " ";
+ cout << endl;
+
+ if ((**p).is_marked()) {
+ // explore edges
+ for (auto e = (**p).outgoing.begin(); e!=(**p).outgoing.end(); ++e) {
+ (**e).mark++;
+ if ((**e).is_marked()) {
+ (**e).head->mark++;
}
}
- if (add==true) {
- tmp.push_back( (*it)->head );
- cout << " " << (*it)->head->id<< endl;
- }
+ }
+ if ((**p).is_marked()) {
+ nodes.splice(to, nodes, p);
+ to = next(p);
+ p = to;
+ } else {
+ ++p;
+ /*if (p == nodes.end()) {
+ for (auto e = (**to).outgoing.begin(); e!=(**to).outgoing.end(); ++e) {
+ // explore edges
+ (**e).mark++;
+ if ((**e).is_marked()) {
+ (**e).head->mark++;
+ }
+ }
+ to++;
+ p = to;
+ }*/
}
}
- return sorted;
}
-void
+/*void
init(vector<Node*>& nodes, ViterbiSemiring<double>& semiring, Node* root)
{
for (auto it = nodes.begin(); it != nodes.end(); ++it)
@@ -139,7 +124,7 @@ viterbi(vector<Node*>& nodes, map<unsigned int, Hg::Node*> nodes_by_id, Node* ro
ViterbiSemiring<double> semiring;
init(sorted, semiring, root);
-
+
for (auto n_it = sorted.begin(); n_it != sorted.end(); ++n_it) {
for (auto e_it = (*n_it)->incoming.begin(); e_it != (*n_it)->incoming.end(); ++e_it) {
cout << (*e_it)->s() << endl;
@@ -154,7 +139,7 @@ viterbi(vector<Node*>& nodes, map<unsigned int, Hg::Node*> nodes_by_id, Node* ro
for (auto it = sorted.begin(); it != sorted.end(); ++it) {
cout << (*it)->id << " " << (*it)->score << endl;
}
-}
+}*/
} // namespace
diff --git a/fast/hypergraph.hh b/fast/hypergraph.hh
index e5f91cb..68cca19 100644
--- a/fast/hypergraph.hh
+++ b/fast/hypergraph.hh
@@ -7,75 +7,74 @@
#include <string>
#include <sstream>
#include <vector>
-#include <map>
+#include <list>
+#include <unordered_map>
#include <functional>
#include <algorithm>
+#include <iterator>
-#include <msgpack.hpp>
+#include "msgpack-c/include/msgpack.hpp"
using namespace std;
typedef double score_t;
typedef double weight_t;
-typedef size_t id_t;
namespace Hg {
-
class Node;
class Edge {
public:
- id_t head;
- vector<id_t> tails;
+ Node* head;
+ vector<Node*> tails;
score_t score;
+ //Grammar::Rule rule; FIXME
vector<weight_t> f;
+ unsigned int arity;
unsigned int mark;
- //Grammar::Rule rule; FIXME
- unsigned int arity_;
- unsigned int arity();
bool is_marked();
- string s();
+ friend std::ostream& operator<<(std::ostream& os, const Edge& s);
- MSGPACK_DEFINE(head, tails, score, f, mark, arity_);
+ size_t head_id_;
+ vector<size_t> tails_ids_; // node ids
+ MSGPACK_DEFINE(head_id_, tails_ids_, score, f, arity);
};
-
class Node {
public:
- unsigned int id;
- string symbol;
- unsigned int left;
- unsigned int right;
- score_t score;
- vector<id_t> outgoing;
- vector<id_t> incoming;
+ size_t id;
+ string symbol;
+ unsigned short left;
+ unsigned short right;
+ score_t score;
+ vector<Edge*> incoming;
+ vector<Edge*> outgoing;
+ unsigned int mark;
- string s();
+ bool is_marked();
+ friend std::ostream& operator<<(std::ostream& os, const Node& n);
- MSGPACK_DEFINE(id, symbol, left, right, score, outgoing, incoming);
+ vector<size_t> incoming_ids_; // edge ids
+ vector<size_t> outgoing_ids_; // edge ids
+ MSGPACK_DEFINE(id, symbol, left, right, score, incoming_ids_, outgoing_ids_);
};
-
-
class Hypergraph {
public:
- vector<Node> nodes;
- vector<Edge> edges;
- unsigned int arity_;
+ list<Node*> nodes;
+ vector<Edge*> edges;
+ unordered_map<size_t, Node*> nodes_by_id;
+ unsigned int arity;
- unsigned int arity();
void reset();
- string s();
- string json_s();
-
- MSGPACK_DEFINE(nodes, edges, arity_, nodes_by_id);
+ void add_node(Node* n) { nodes.push_back(n); nodes_by_id[n->id] = n; }
};
-vector<Node*> topological_sort(vector<Node*>& nodes);
-void viterbi(vector<Node*>& nodes, map<unsigned int, Hg::Node*> nodes_by_id, Node* root);
+void topological_sort(list<Node*>& nodes, list<Node*>::iterator root);
+void viterbi(Hypergraph& hg);
} // namespace
diff --git a/fast/main.cc b/fast/main.cc
index c951cd8..372f0f1 100644
--- a/fast/main.cc
+++ b/fast/main.cc
@@ -34,105 +34,108 @@ main(void)
]
}
*/
+ Hg::Hypergraph hg;
// nodes
- Hg::Node a; a.id = 0; a.symbol = "root"; a.left = false; a.right = false;
- Hg::Node b; b.id = 1; b.symbol = "NP"; b.left = 0; b.right = 1;
- Hg::Node c; c.id = 2; c.symbol = "V"; c.left = 1; c.right = 2;
- Hg::Node d; d.id = 3; d.symbol = "JJ"; d.left = 3; d.right = 4;
- Hg::Node e; e.id = 4; e.symbol = "NN"; e.left = 3; e.right = 5;
- Hg::Node f; f.id = 5; f.symbol = "NP"; f.left = 2; f.right = 5;
- Hg::Node g; g.id = 6; g.symbol = "NP"; g.left = 1; g.right = 5;
- Hg::Node h; h.id = 7; h.symbol = "S"; g.left = 0; g.right = 5;
-
- vector<Hg::Node*> nodes;
- nodes.push_back(&a);
- nodes.push_back(&b);
- nodes.push_back(&c);
- nodes.push_back(&d);
- nodes.push_back(&e);
- nodes.push_back(&f);
- nodes.push_back(&g);
- nodes.push_back(&h);
-
- // nodes_by_id
- map<unsigned int, Hg::Node*> nodes_by_id;
- nodes_by_id[0] = &a;
- nodes_by_id[1] = &b;
- nodes_by_id[2] = &c;
- nodes_by_id[3] = &d;
- nodes_by_id[4] = &e;
- nodes_by_id[5] = &f;
- nodes_by_id[6] = &g;
- nodes_by_id[7] = &h;
+ Hg::Node a; a.id = 0; a.symbol = "root"; a.left = false; a.right = false; a.mark = 0;
+ Hg::Node b; b.id = 1; b.symbol = "NP"; b.left = 0; b.right = 1; b.mark = 0;
+ Hg::Node c; c.id = 2; c.symbol = "V"; c.left = 1; c.right = 2; c.mark = 0;
+ Hg::Node d; d.id = 3; d.symbol = "JJ"; d.left = 3; d.right = 4; d.mark = 0;
+ Hg::Node e; e.id = 4; e.symbol = "NN"; e.left = 3; e.right = 5; e.mark = 0;
+ Hg::Node f; f.id = 5; f.symbol = "NP"; f.left = 2; f.right = 5; f.mark = 0;
+ Hg::Node g; g.id = 6; g.symbol = "NP"; g.left = 1; g.right = 5; g.mark = 0;
+ Hg::Node h; h.id = 7; h.symbol = "S"; h.left = 0; h.right = 6; h.mark = 0;
+
+ hg.add_node(&a);
+ hg.add_node(&h);
+ hg.add_node(&g);
+ hg.add_node(&c);
+ hg.add_node(&d);
+ hg.add_node(&f);
+ hg.add_node(&b);
+ hg.add_node(&e);
// edges
- Hg::Hyperedge q; q.head = nodes_by_id[1]; q.tails.push_back(nodes_by_id[0]); q.score = 0.367879441171;
- nodes_by_id[1]->incoming.push_back(&q);
- nodes_by_id[0]->outgoing.push_back(&q);
- q.arity_ = 1;
+ Hg::Edge q; q.head = hg.nodes_by_id[1]; q.tails.push_back(hg.nodes_by_id[0]); q.score = 0.367879441171;
+ hg.nodes_by_id[1]->incoming.push_back(&q);
+ hg.nodes_by_id[0]->outgoing.push_back(&q);
+ q.arity = 1;
q.mark = 0;
+ hg.edges.push_back(&q);
- Hg::Hyperedge p; p.head = nodes_by_id[2]; p.tails.push_back(nodes_by_id[0]); p.score = 0.606530659713;
- nodes_by_id[2]->incoming.push_back(&p);
- nodes_by_id[0]->outgoing.push_back(&p);
- p.arity_ = 1;
+ Hg::Edge p; p.head = hg.nodes_by_id[2]; p.tails.push_back(hg.nodes_by_id[0]); p.score = 0.606530659713;
+ hg.nodes_by_id[2]->incoming.push_back(&p);
+ hg.nodes_by_id[0]->outgoing.push_back(&p);
+ p.arity = 1;
p.mark = 0;
+ hg.edges.push_back(&p);
- Hg::Hyperedge r; r.head = nodes_by_id[3]; r.tails.push_back(nodes_by_id[0]); r.score = 1.0;
- nodes_by_id[3]->incoming.push_back(&r);
- nodes_by_id[0]->outgoing.push_back(&r);
- r.arity_ = 1;
+ Hg::Edge r; r.head = hg.nodes_by_id[3]; r.tails.push_back(hg.nodes_by_id[0]); r.score = 1.0;
+ hg.nodes_by_id[3]->incoming.push_back(&r);
+ hg.nodes_by_id[0]->outgoing.push_back(&r);
+ r.arity = 1;
r.mark = 0;
- Hg::Hyperedge s; s.head = nodes_by_id[3]; s.tails.push_back(nodes_by_id[0]); s.score = 1.0;
- nodes_by_id[3]->incoming.push_back(&s);
- nodes_by_id[0]->outgoing.push_back(&s);
- s.arity_ = 1;
+ hg.edges.push_back(&r);
+
+ Hg::Edge s; s.head = hg.nodes_by_id[3]; s.tails.push_back(hg.nodes_by_id[0]); s.score = 1.0;
+ hg.nodes_by_id[3]->incoming.push_back(&s);
+ hg.nodes_by_id[0]->outgoing.push_back(&s);
+ s.arity = 1;
s.mark = 0;
+ hg.edges.push_back(&s);
- Hg::Hyperedge t; t.head = nodes_by_id[4]; t.tails.push_back(nodes_by_id[0]); t.score = 1.0;
- nodes_by_id[4]->incoming.push_back(&t);
- nodes_by_id[0]->outgoing.push_back(&t);
- t.arity_ = 1;
+ Hg::Edge t; t.head = hg.nodes_by_id[4]; t.tails.push_back(hg.nodes_by_id[0]); t.score = 1.0;
+ hg.nodes_by_id[4]->incoming.push_back(&t);
+ hg.nodes_by_id[0]->outgoing.push_back(&t);
+ t.arity = 1;
t.mark = 0;
- Hg::Hyperedge u; u.head = nodes_by_id[4]; u.tails.push_back(nodes_by_id[0]); u.score = 1.0;
- nodes_by_id[4]->incoming.push_back(&u);
- nodes_by_id[0]->outgoing.push_back(&u);
- u.arity_ = 1;
+ hg.edges.push_back(&t);
+
+ Hg::Edge u; u.head = hg.nodes_by_id[4]; u.tails.push_back(hg.nodes_by_id[0]); u.score = 1.0;
+ hg.nodes_by_id[4]->incoming.push_back(&u);
+ hg.nodes_by_id[0]->outgoing.push_back(&u);
+ u.arity = 1;
u.mark = 0;
+ hg.edges.push_back(&u);
- Hg::Hyperedge v; v.head = nodes_by_id[4]; v.tails.push_back(nodes_by_id[3]); v.score = 1.0;
- nodes_by_id[4]->incoming.push_back(&v);
- nodes_by_id[3]->outgoing.push_back(&v);
- v.arity_ = 1;
+ Hg::Edge v; v.head = hg.nodes_by_id[4]; v.tails.push_back(hg.nodes_by_id[3]); v.score = 1.0;
+ hg.nodes_by_id[4]->incoming.push_back(&v);
+ hg.nodes_by_id[3]->outgoing.push_back(&v);
+ v.arity = 1;
v.mark = 0;
- Hg::Hyperedge w; w.head = nodes_by_id[4]; w.tails.push_back(nodes_by_id[3]); w.score = 2.71828182846;
- nodes_by_id[4]->incoming.push_back(&w);
- nodes_by_id[3]->outgoing.push_back(&w);
- w.arity_ = 1;
+ hg.edges.push_back(&v);
+
+ Hg::Edge w; w.head = hg.nodes_by_id[4]; w.tails.push_back(hg.nodes_by_id[3]); w.score = 2.71828182846;
+ hg.nodes_by_id[4]->incoming.push_back(&w);
+ hg.nodes_by_id[3]->outgoing.push_back(&w);
+ w.arity = 1;
w.mark = 0;
+ hg.edges.push_back(&w);
- Hg::Hyperedge x; x.head = nodes_by_id[5]; x.tails.push_back(nodes_by_id[4]); x.score = 1.0;
- nodes_by_id[5]->incoming.push_back(&x);
- nodes_by_id[4]->outgoing.push_back(&x);
- x.arity_ = 1;
+ Hg::Edge x; x.head = hg.nodes_by_id[5]; x.tails.push_back(hg.nodes_by_id[4]); x.score = 1.0;
+ hg.nodes_by_id[5]->incoming.push_back(&x);
+ hg.nodes_by_id[4]->outgoing.push_back(&x);
+ x.arity = 1;
x.mark = 0;
+ hg.edges.push_back(&x);
- Hg::Hyperedge y; y.head = nodes_by_id[6]; y.tails.push_back(nodes_by_id[2]); y.tails.push_back(nodes_by_id[5]); y.score = 1.0;
- nodes_by_id[6]->incoming.push_back(&y);
- nodes_by_id[2]->outgoing.push_back(&y);
- nodes_by_id[5]->outgoing.push_back(&y);
- y.arity_ = 2;
+ Hg::Edge y; y.head = hg.nodes_by_id[6]; y.tails.push_back(hg.nodes_by_id[2]); y.tails.push_back(hg.nodes_by_id[5]); y.score = 1.0;
+ hg.nodes_by_id[6]->incoming.push_back(&y);
+ hg.nodes_by_id[2]->outgoing.push_back(&y);
+ hg.nodes_by_id[5]->outgoing.push_back(&y);
+ y.arity = 2;
y.mark = 0;
+ hg.edges.push_back(&y);
- Hg::Hyperedge z; z.head = nodes_by_id[7]; z.tails.push_back(nodes_by_id[1]); z.tails.push_back(nodes_by_id[6]); z.score = 1.0;
- nodes_by_id[7]->incoming.push_back(&z);
- nodes_by_id[1]->outgoing.push_back(&z);
- nodes_by_id[6]->outgoing.push_back(&z);
- z.arity_ = 2;
+ Hg::Edge z; z.head = hg.nodes_by_id[7]; z.tails.push_back(hg.nodes_by_id[1]); z.tails.push_back(hg.nodes_by_id[6]); z.score = 1.0;
+ hg.nodes_by_id[7]->incoming.push_back(&z);
+ hg.nodes_by_id[1]->outgoing.push_back(&z);
+ hg.nodes_by_id[6]->outgoing.push_back(&z);
+ z.arity = 2;
z.mark = 0;
+ hg.edges.push_back(&z);
- // test
- Hg::viterbi(nodes, nodes_by_id, nodes_by_id[0]);
+ Hg::topological_sort(hg.nodes, hg.nodes.begin());
+ //Hg::viterbi(nodes, hg.nodes, hg.nodes_by_id(0]);
}