summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fast/Makefile12
-rw-r--r--fast/README.md8
-rwxr-xr-xfast/fast_weaverbin0 -> 88433 bytes
-rw-r--r--fast/grammar.cc16
-rw-r--r--fast/grammar.hh33
-rw-r--r--fast/grammar.obin0 -> 2872 bytes
-rw-r--r--fast/hypergraph.cc161
-rw-r--r--fast/hypergraph.hh76
-rw-r--r--fast/hypergraph.obin0 -> 72616 bytes
-rw-r--r--fast/main.cc138
-rw-r--r--fast/semiring.hh36
11 files changed, 480 insertions, 0 deletions
diff --git a/fast/Makefile b/fast/Makefile
new file mode 100644
index 0000000..4aea1ac
--- /dev/null
+++ b/fast/Makefile
@@ -0,0 +1,12 @@
+all: hypergraph.o main.cc
+ clang -std=c++11 -lstdc++ main.cc hypergraph.o -o fast_weaver
+
+hypergraph.o: hypergraph.cc hypergraph.hh grammar.o semiring.hh
+ clang -std=c++11 -lstdc++ -c hypergraph.cc grammar.o
+
+grammar.o: grammar.cc grammar.hh
+ clang -std=c++11 -lstdc++ -c grammar.cc
+
+clean:
+ rm fast_weaver hypergraph.o grammar.o
+
diff --git a/fast/README.md b/fast/README.md
new file mode 100644
index 0000000..112a7ae
--- /dev/null
+++ b/fast/README.md
@@ -0,0 +1,8 @@
+TODO
+ * grammar
+ * parser
+ * other semirings
+ * sparse vector
+ * hg: json input (jsoncpp?)
+ * language model: kenlm
+
diff --git a/fast/fast_weaver b/fast/fast_weaver
new file mode 100755
index 0000000..7d349b3
--- /dev/null
+++ b/fast/fast_weaver
Binary files differ
diff --git a/fast/grammar.cc b/fast/grammar.cc
new file mode 100644
index 0000000..946b062
--- /dev/null
+++ b/fast/grammar.cc
@@ -0,0 +1,16 @@
+#include "grammar.hh"
+
+namespace Grammar {
+
+
+string
+NT::s()
+{
+ ostringstream os;
+ os << "NT<" << this->symbol << "," << this->index << ">";
+ return os.str();
+}
+
+
+} // namespace
+
diff --git a/fast/grammar.hh b/fast/grammar.hh
new file mode 100644
index 0000000..5625b85
--- /dev/null
+++ b/fast/grammar.hh
@@ -0,0 +1,33 @@
+#ifndef GRAMMAR_HH
+#define GRAMMAR_HH
+
+#include <string>
+#include <sstream>
+
+using namespace std;
+
+namespace Grammar {
+
+
+class NT {
+ public:
+ string symbol;
+ unsigned int index;
+
+ string s();
+};
+
+class T {
+ public:
+ string word;
+};
+
+class Rule {
+ public:
+};
+
+
+} // namespace
+
+#endif
+
diff --git a/fast/grammar.o b/fast/grammar.o
new file mode 100644
index 0000000..aae25db
--- /dev/null
+++ b/fast/grammar.o
Binary files differ
diff --git a/fast/hypergraph.cc b/fast/hypergraph.cc
new file mode 100644
index 0000000..08b4d05
--- /dev/null
+++ b/fast/hypergraph.cc
@@ -0,0 +1,161 @@
+#include "hypergraph.hh"
+
+namespace Hg {
+
+
+/*
+ * Node
+ * methods
+ *
+ */
+string
+Node::s()
+{
+ ostringstream os;
+ os << \
+ "Node<id=" << this->id << \
+ ", symbol=" << this->symbol << \
+ ", span=(" << this->left << "," << this->right << ")" \
+ ", outgoing:" << this->outgoing.size() << \
+ ", incoming:" << this->incoming.size() << ">";
+ return os.str();
+}
+
+/*
+ * Hyperedge
+ * methods
+ *
+ */
+unsigned int
+Hyperedge::arity()
+{
+ return this->arity_;
+}
+
+bool
+Hyperedge::is_marked()
+{
+ return (this->arity_ == this->mark);
+}
+
+string
+Hyperedge::s()
+{
+ ostringstream os;
+ vector<unsigned int> tails_ids;
+ ostringstream _;
+ for_each(this->tails.begin(), this->tails.end(), [&_](Node* n) { _ << n->id; _ << ","; });
+ os << \
+ "Hyperedge<head=" << this->head->id << \
+ ", rule:'" << "TODO" << \
+ "', tails=" << _.str() << \
+ ", arity=" << this->arity() << \
+ ", score=" << this->score << \
+ " , f=" << "TODO" << \
+ ", mark=" << this->mark << ">";
+ return os.str();
+}
+
+
+/*
+ * Hypergraph
+ * methods
+ *
+ */
+unsigned int
+Hypergraph::arity()
+{
+ // TODO
+ return 0;
+}
+
+void
+Hypergraph::reset()
+{
+}
+
+string
+Hypergraph::s()
+{
+ // TODO
+ return "";
+}
+
+string
+Hypergraph::json_s()
+{
+ // TODO
+ return "";
+}
+
+/*
+ * functions
+ *
+ */
+vector<Node*>
+topological_sort(vector<Node*>& nodes)
+{
+ vector<Node*> sorted;
+ vector<Node*> tmp;
+ for_each(nodes.begin(), nodes.end(), [&tmp](Node* n) -> void { if (n->incoming.size()==0) tmp.push_back(n); });
+ while (!tmp.empty()) {
+ Node* n = tmp.front();
+ cout << n->id << endl;
+ tmp.erase(tmp.begin());
+ sorted.push_back(n);
+ for (auto it = n->outgoing.begin(); it != n->outgoing.end(); it++) { // edges
+ if ((*it)->is_marked()) {
+ continue;
+ }
+ (*it)->mark++;
+ bool add = true;
+ for (auto jt = (*it)->head->incoming.begin(); jt != (*it)->head->incoming.end(); jt++) {
+ if (!(*jt)->is_marked()) {
+ add = false;
+ break;
+ }
+ }
+ if (add==true) {
+ tmp.push_back( (*it)->head );
+ cout << " " << (*it)->head->id<< endl;
+ }
+ }
+ }
+ return sorted;
+}
+
+void
+init(vector<Node*>& nodes, ViterbiSemiring<double>& semiring, Node* root)
+{
+ for (auto it = nodes.begin(); it != nodes.end(); ++it)
+ (*it)->score = semiring.null;
+ root->score = semiring.one;
+}
+
+void
+viterbi(vector<Node*>& nodes, map<unsigned int, Hg::Node*> nodes_by_id, Node* root)
+{
+ vector<Node*> sorted = topological_sort(nodes);
+ ViterbiSemiring<double> semiring;
+
+ init(sorted, semiring, root);
+
+ for (auto n_it = sorted.begin(); n_it != sorted.end(); ++n_it) {
+ for (auto e_it = (*n_it)->incoming.begin(); e_it != (*n_it)->incoming.end(); ++e_it) {
+ cout << (*e_it)->s() << endl;
+ double s = semiring.one;
+ for (auto m_it = (*e_it)->tails.begin(); m_it != (*e_it)->tails.end(); m_it++) {
+ s = semiring.multiply(s, (*m_it)->score);
+ }
+ (*n_it)->score = semiring.add((*n_it)->score, semiring.multiply(s, (*e_it)->score));
+ }
+ }
+
+ for (auto it = sorted.begin(); it != sorted.end(); ++it) {
+ cout << (*it)->id << " " << (*it)->score << endl;
+ }
+}
+
+
+} // namespace
+
diff --git a/fast/hypergraph.hh b/fast/hypergraph.hh
new file mode 100644
index 0000000..6e53045
--- /dev/null
+++ b/fast/hypergraph.hh
@@ -0,0 +1,76 @@
+#ifndef HYPERGRAPH_HH
+#define HYPERGRAPH_HH
+
+#include "grammar.hh"
+#include "semiring.hh"
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <vector>
+#include <map>
+#include <functional>
+#include <algorithm>
+
+using namespace std;
+
+typedef double score_t;
+typedef double weight_t;
+
+namespace Hg {
+
+
+class Node;
+
+class Hyperedge {
+ public:
+ Node* head;
+ vector<Node*> tails;
+ score_t score;
+ vector<weight_t> f;
+ unsigned int mark;
+ //Grammar::Rule rule; FIXME
+ unsigned int arity_;
+
+ unsigned int arity();
+ bool is_marked();
+ string s();
+
+};
+
+
+class Node {
+ public:
+ unsigned int id;
+ string symbol;
+ unsigned int left;
+ unsigned int right;
+ score_t score;
+ vector<Hyperedge*> outgoing;
+ vector<Hyperedge*> incoming;
+
+ string s();
+};
+
+
+
+class Hypergraph {
+ public:
+ vector<Node*> nodes;
+ vector<Hyperedge*> edges;
+ unsigned int arity_;
+ map<unsigned int, Node*> nodes_by_id;
+
+ unsigned int arity();
+ void reset();
+ string s();
+ string json_s();
+};
+
+vector<Node*> topological_sort(vector<Node*>& nodes);
+void viterbi(vector<Node*>& nodes, map<unsigned int, Hg::Node*> nodes_by_id, Node* root);
+
+
+} // namespace
+
+#endif
+
diff --git a/fast/hypergraph.o b/fast/hypergraph.o
new file mode 100644
index 0000000..8fab348
--- /dev/null
+++ b/fast/hypergraph.o
Binary files differ
diff --git a/fast/main.cc b/fast/main.cc
new file mode 100644
index 0000000..c951cd8
--- /dev/null
+++ b/fast/main.cc
@@ -0,0 +1,138 @@
+#include "hypergraph.hh"
+
+
+int
+main(void)
+{
+/*
+{
+"weights":{"logp":2.0,"use_house":0.0,"use_shell":1.0},
+"nodes":
+[
+{ "id":0, "cat":"root", "span":[0,0] },
+{ "id":1, "cat":"NP", "span":[1,2] },
+{ "id":2, "cat":"V", "span":[2,3] },
+{ "id":3, "cat":"JJ", "span":[4,5] },
+{ "id":4, "cat":"NN", "span":[4,6] },
+{ "id":5, "cat":"NP", "span":[3,6] },
+{ "id":6, "cat":"VP", "span":[2,6] },
+{ "id":7, "cat":"S", "span":[1,6] }
+],
+"edges":
+[
+{ "head":1, "rule":"[NP] ||| ich ||| i", "tails":[0], "f":{"logp":-0.5,"use_i":1.0} },
+{ "head":2, "rule":"[V] ||| sah ||| saw", "tails":[0], "f":{"logp":-0.25,"use_saw":1.0} },
+{ "head":3, "rule":"[JJ] ||| kleines ||| small", "tails":[0], "f":{"logp":0.0,"use_small":1.0} },
+{ "head":3, "rule":"[JJ] ||| kleines ||| little", "tails":[0], "f":{"logp":0.0,"use_little":1.0} },
+{ "head":4, "rule":"[NN] ||| kleines haus ||| small house", "tails":[0], "f":{"logp":0.0,"use_house":1.0} },
+{ "head":4, "rule":"[NN] ||| kleines haus ||| little house", "tails":[0], "f":{"logp":0.0,"use_house":1.0} },
+{ "head":4, "rule":"[NN] ||| [JJ,1] haus ||| [JJ,1] shell", "tails":[3], "f":{"logp":0.0,"use_shell":1.0} },
+{ "head":4, "rule":"[NN] ||| [JJ,1] haus ||| [JJ,1] house", "tails":[3], "f":{"logp":0.0,"use_house":1.0} },
+{ "head":5, "rule":"[NP] ||| ein [NN,1] ||| a [NN,1]", "tails":[4], "f":{"logp":0.0,"use_a":1.0} },
+{ "head":6, "rule":"[VP] ||| [V,1] [NP,2] ||| [V,1] [NP,2]", "tails":[2, 5], "f":{"logp":0.0} },
+{ "head":7, "rule":"[S] ||| [NP,1] [VP,2] ||| [NP,1] [VP,2]", "tails":[1, 6], "f":{"logp":0.0} }
+]
+}
+*/
+
+ // nodes
+ Hg::Node a; a.id = 0; a.symbol = "root"; a.left = false; a.right = false;
+ Hg::Node b; b.id = 1; b.symbol = "NP"; b.left = 0; b.right = 1;
+ Hg::Node c; c.id = 2; c.symbol = "V"; c.left = 1; c.right = 2;
+ Hg::Node d; d.id = 3; d.symbol = "JJ"; d.left = 3; d.right = 4;
+ Hg::Node e; e.id = 4; e.symbol = "NN"; e.left = 3; e.right = 5;
+ Hg::Node f; f.id = 5; f.symbol = "NP"; f.left = 2; f.right = 5;
+ Hg::Node g; g.id = 6; g.symbol = "NP"; g.left = 1; g.right = 5;
+ Hg::Node h; h.id = 7; h.symbol = "S"; g.left = 0; g.right = 5;
+
+ vector<Hg::Node*> nodes;
+ nodes.push_back(&a);
+ nodes.push_back(&b);
+ nodes.push_back(&c);
+ nodes.push_back(&d);
+ nodes.push_back(&e);
+ nodes.push_back(&f);
+ nodes.push_back(&g);
+ nodes.push_back(&h);
+
+ // nodes_by_id
+ map<unsigned int, Hg::Node*> nodes_by_id;
+ nodes_by_id[0] = &a;
+ nodes_by_id[1] = &b;
+ nodes_by_id[2] = &c;
+ nodes_by_id[3] = &d;
+ nodes_by_id[4] = &e;
+ nodes_by_id[5] = &f;
+ nodes_by_id[6] = &g;
+ nodes_by_id[7] = &h;
+
+ // edges
+ Hg::Hyperedge q; q.head = nodes_by_id[1]; q.tails.push_back(nodes_by_id[0]); q.score = 0.367879441171;
+ nodes_by_id[1]->incoming.push_back(&q);
+ nodes_by_id[0]->outgoing.push_back(&q);
+ q.arity_ = 1;
+ q.mark = 0;
+
+ Hg::Hyperedge p; p.head = nodes_by_id[2]; p.tails.push_back(nodes_by_id[0]); p.score = 0.606530659713;
+ nodes_by_id[2]->incoming.push_back(&p);
+ nodes_by_id[0]->outgoing.push_back(&p);
+ p.arity_ = 1;
+ p.mark = 0;
+
+ Hg::Hyperedge r; r.head = nodes_by_id[3]; r.tails.push_back(nodes_by_id[0]); r.score = 1.0;
+ nodes_by_id[3]->incoming.push_back(&r);
+ nodes_by_id[0]->outgoing.push_back(&r);
+ r.arity_ = 1;
+ r.mark = 0;
+ Hg::Hyperedge s; s.head = nodes_by_id[3]; s.tails.push_back(nodes_by_id[0]); s.score = 1.0;
+ nodes_by_id[3]->incoming.push_back(&s);
+ nodes_by_id[0]->outgoing.push_back(&s);
+ s.arity_ = 1;
+ s.mark = 0;
+
+ Hg::Hyperedge t; t.head = nodes_by_id[4]; t.tails.push_back(nodes_by_id[0]); t.score = 1.0;
+ nodes_by_id[4]->incoming.push_back(&t);
+ nodes_by_id[0]->outgoing.push_back(&t);
+ t.arity_ = 1;
+ t.mark = 0;
+ Hg::Hyperedge u; u.head = nodes_by_id[4]; u.tails.push_back(nodes_by_id[0]); u.score = 1.0;
+ nodes_by_id[4]->incoming.push_back(&u);
+ nodes_by_id[0]->outgoing.push_back(&u);
+ u.arity_ = 1;
+ u.mark = 0;
+
+ Hg::Hyperedge v; v.head = nodes_by_id[4]; v.tails.push_back(nodes_by_id[3]); v.score = 1.0;
+ nodes_by_id[4]->incoming.push_back(&v);
+ nodes_by_id[3]->outgoing.push_back(&v);
+ v.arity_ = 1;
+ v.mark = 0;
+ Hg::Hyperedge w; w.head = nodes_by_id[4]; w.tails.push_back(nodes_by_id[3]); w.score = 2.71828182846;
+ nodes_by_id[4]->incoming.push_back(&w);
+ nodes_by_id[3]->outgoing.push_back(&w);
+ w.arity_ = 1;
+ w.mark = 0;
+
+ Hg::Hyperedge x; x.head = nodes_by_id[5]; x.tails.push_back(nodes_by_id[4]); x.score = 1.0;
+ nodes_by_id[5]->incoming.push_back(&x);
+ nodes_by_id[4]->outgoing.push_back(&x);
+ x.arity_ = 1;
+ x.mark = 0;
+
+ Hg::Hyperedge y; y.head = nodes_by_id[6]; y.tails.push_back(nodes_by_id[2]); y.tails.push_back(nodes_by_id[5]); y.score = 1.0;
+ nodes_by_id[6]->incoming.push_back(&y);
+ nodes_by_id[2]->outgoing.push_back(&y);
+ nodes_by_id[5]->outgoing.push_back(&y);
+ y.arity_ = 2;
+ y.mark = 0;
+
+ Hg::Hyperedge z; z.head = nodes_by_id[7]; z.tails.push_back(nodes_by_id[1]); z.tails.push_back(nodes_by_id[6]); z.score = 1.0;
+ nodes_by_id[7]->incoming.push_back(&z);
+ nodes_by_id[1]->outgoing.push_back(&z);
+ nodes_by_id[6]->outgoing.push_back(&z);
+ z.arity_ = 2;
+ z.mark = 0;
+
+ // test
+ Hg::viterbi(nodes, nodes_by_id, nodes_by_id[0]);
+}
+
diff --git a/fast/semiring.hh b/fast/semiring.hh
new file mode 100644
index 0000000..1e40f48
--- /dev/null
+++ b/fast/semiring.hh
@@ -0,0 +1,36 @@
+#ifndef SEMIRING_HH
+#define SEMIRING_HH
+
+
+template<typename T>
+class ViterbiSemiring {
+ public:
+ T one = 1.0;
+ T null = 0.0;
+
+ T add(T x, T y);
+ T multiply(T x, T y);
+ T convert(T x);
+};
+
+template<typename T> T
+ViterbiSemiring<T>::add(T x, T y)
+{
+ return max(x, y);
+}
+
+template<typename T> T
+ViterbiSemiring<T>::multiply(T x, T y)
+{
+ return x * y;
+}
+
+template<typename T> T
+ViterbiSemiring<T>::convert(T x)
+{
+ return (T)x;
+}
+
+
+#endif
+