summaryrefslogtreecommitdiff
path: root/fast
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-09-16 10:23:14 +0100
committerPatrick Simianer <p@simianer.de>2014-09-16 10:23:14 +0100
commit129a22cfcc7651daa4b11ed52e7870249f6373a5 (patch)
tree78de4649396ab0d37a325b7598f9873c2d65f4c9 /fast
parentdf70006a07fb67b17fb39aa56762c50c2e7b8131 (diff)
spring cleaning
Diffstat (limited to 'fast')
-rw-r--r--fast/Makefile15
-rw-r--r--fast/README.md38
-rw-r--r--fast/grammar.hh316
-rw-r--r--fast/hypergraph.cc363
-rw-r--r--fast/hypergraph.hh103
-rw-r--r--fast/main.cc27
-rw-r--r--fast/parse.hh108
-rw-r--r--fast/semiring.hh35
-rw-r--r--fast/sparse_vector.hh186
-rw-r--r--fast/test/Makefile19
-rwxr-xr-xfast/test/test_grammarbin60943 -> 0 bytes
-rw-r--r--fast/test/test_grammar.cc20
-rwxr-xr-xfast/test/test_sparse_vectorbin44288 -> 0 bytes
-rw-r--r--fast/test/test_sparse_vector.cc37
-rw-r--r--fast/util.hh47
-rw-r--r--fast/weaver.hh10
16 files changed, 0 insertions, 1324 deletions
diff --git a/fast/Makefile b/fast/Makefile
deleted file mode 100644
index 1a7f5b9..0000000
--- a/fast/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-COMPILER=g++
-CFLAGS=-std=c++11 -O3
-TCMALLOC=/home/pks/src/weaver/fast/gperftools-2.1/lib/libtcmalloc_minimal.a -pthread
-
-
-all: hypergraph.o main.cc
- $(COMPILER) $(CFLAGS) -lstdc++ -lm -lmsgpack $(TCMALLOC) hypergraph.o main.cc -o fast_weaver
-
-hypergraph.o: hypergraph.cc hypergraph.hh semiring.hh sparse_vector.hh weaver.hh
- $(COMPILER) $(CFLAGS) -g -c $(TCMALLOC) hypergraph.cc
-
-clean:
- rm -f fast_weaver
- rm -f hypergraph.o parse.o
-
diff --git a/fast/README.md b/fast/README.md
deleted file mode 100644
index f92245b..0000000
--- a/fast/README.md
+++ /dev/null
@@ -1,38 +0,0 @@
-TODO
- * sparse vector (unordered_map) -> where to store?
- * parser
- * Rule -> ChartItem -> Node ?
- * k-best
- * other semirings
- * include language model
- * compress/hash words/feature strings?
- * cast? Rule -> Edge, ChartItem -> Node
- * feature factory, observer
-
-Dependencies:
- * MessagePack for object serialization [1]
- * kenlm language model [2]
-
-This is Linux only.
-
-
-[1] http://msgpack.org
-[2] http://kheafield.com/code/kenlm/
-
-
-stuff to have a look at:
-http://math.nist.gov/spblas/
-http://lapackpp.sourceforge.net/
-http://www.cvmlib.com/
-http://sourceforge.net/projects/lpp/
-http://math-atlas.sourceforge.net/
-http://www.netlib.org/lapack/
-http://bytes.com/topic/c/answers/702569-blas-vs-cblas-c
-http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack
-http://www.osl.iu.edu/research/mtl/download.php3
-http://scicomp.stackexchange.com/questions/351/recommendations-for-a-usable-fast-c-matrix-library
-https://software.intel.com/en-us/tbb_4.2_doc
-http://goog-perftools.sourceforge.net/doc/tcmalloc.html
-http://www.sgi.com/tech/stl/Rope.html
-http://www.cs.unc.edu/Research/compgeom/gzstream/
-
diff --git a/fast/grammar.hh b/fast/grammar.hh
deleted file mode 100644
index 4906c46..0000000
--- a/fast/grammar.hh
+++ /dev/null
@@ -1,316 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include <fstream>
-#include <map>
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include <msgpack.hpp>
-
-#include "sparse_vector.hh"
-#include "util.hh"
-
-using namespace std;
-
-
-namespace G {
-
-enum item_type {
- UNDEFINED,
- TERMINAL,
- NON_TERMINAL
-};
-
-struct Item {
- virtual size_t index() const { return 0; }
- virtual symbol_t symbol() const { return ""; }
- virtual item_type type() const { return UNDEFINED; }
-
- virtual ostream& repr(ostream& os) const { return os << "Item<>"; }
- virtual ostream& escaped(ostream& os) const { return os << ""; }
-
- friend ostream&
- operator<<(ostream& os, const Item& i)
- {
- return i.repr(os);
- };
-};
-
-struct NT : public Item {
- symbol_t symbol_;
- size_t index_;
-
- NT() {}
-
- NT(string const& s)
- {
- index_ = 0; // default
- string t(s);
- t.erase(0, 1); t.pop_back(); // remove '[' and ']'
- istringstream ss(t);
- if (ss >> index_) { // [i]
- symbol_ = "";
- index_ = stoi(s);
- return;
- } else {
- ss.clear();
- string buf;
- size_t j = 0;
- while (ss.good() && getline(ss, buf, ',')) {
- if (j == 0) {
- symbol_ = buf;
- } else {
- index_ = stoi(buf);
- }
- j++;
- }
- }
- }
-
- virtual size_t index() const { return index_; }
- virtual symbol_t symbol() const { return symbol_; }
- virtual item_type type() const { return NON_TERMINAL; }
-
- virtual ostream&
- repr(ostream& os) const
- {
- return os << "NT<" << symbol_ << "," << index_ << ">";
- }
-
- virtual ostream&
- escaped(ostream& os) const
- {
- os << "[" << symbol_;
- if (index_ > 0)
- os << "," << index_;
- os << "]";
-
- return os;
- }
-};
-
-struct T : public Item {
- symbol_t symbol_;
-
- T(string const& s)
- {
- symbol_ = s;
- }
-
- virtual symbol_t symbol() const { return symbol_; }
- virtual item_type type() const { return TERMINAL; }
-
- virtual ostream&
- repr(ostream& os) const
- {
- return os << "T<" << symbol_ << ">";
- }
-
- virtual ostream&
- escaped(ostream& os) const
- {
- os << util::json_escape(symbol_);
- }
-};
-
-struct Vocabulary
-{
- unordered_map<symbol_t, size_t> m_;
- vector<Item*> items_;
-
- bool
- is_non_terminal(string const& s)
- {
- return s.front() == '[' && s.back() == ']';
- }
-
- Item*
- get(symbol_t const& s)
- {
- if (is_non_terminal(s))
- return new NT(s);
- if (m_.find(s) != m_.end())
- return items_[m_[s]];
- return add(s);
- }
-
- Item*
- add(symbol_t const& s)
- {
- size_t next_index_ = items_.size();
- T* item = new T(s);
- items_.push_back(item);
- m_[s] = next_index_;
-
- return item;
- }
-};
-
-struct Rule {
- NT* lhs;
- vector<Item*> rhs;
- vector<Item*> target;
- size_t arity;
-Sv::SparseVector<string, score_t>* f;
- map<size_t, size_t> order;
- string as_str_;
-
- Rule() {}
-
- Rule(string const& s, Vocabulary& vocab) { from_s(this, s, vocab); }
-
- static void
- from_s(Rule* r, string const& s, Vocabulary& vocab)
- {
- istringstream ss(s);
- string buf;
- size_t j = 0, i = 1;
- r->arity = 0;
- vector<NT*> rhs_non_terminals;
- r->f = new Sv::SparseVector<string, score_t>();
- while (ss >> buf) {
- if (buf == "|||") { j++; continue; }
- if (j == 0) { // left-hand side
- r->lhs = new NT(buf);
- } else if (j == 1) { // right-hand side
- Item* item = vocab.get(buf);
- r->rhs.push_back(item);
- if (item->type() == NON_TERMINAL) {
- r->arity++;
- rhs_non_terminals.push_back(reinterpret_cast<NT*>(item));
- }
- } else if (j == 2) { // target
- Item* item = vocab.get(buf);
- if (item->type() == NON_TERMINAL) {
- r->order.insert(make_pair(i, item->index()));
- i++;
- if (item->symbol() == "") { // only [1], [2] ... on target
- reinterpret_cast<NT*>(item)->symbol_ = \
- rhs_non_terminals[item->index()-1]->symbol();
- }
- }
- r->target.push_back(item);
- } else if (j == 3) { // feature vector
- Sv::SparseVector<string, score_t>::from_s(r->f, buf);
- // FIXME: this is slow!!! ^^^
- } else if (j == 4) { // alignment
- } else {
- // error
- }
- if (j == 4) break;
- }
- }
-
- ostream&
- repr(ostream& os) const
- {
- os << "Rule<lhs=";
- lhs->repr(os);
- os << ", rhs:{";
- for (auto it = rhs.begin(); it != rhs.end(); it++) {
- (**it).repr(os);
- if (next(it) != rhs.end()) os << " ";
- }
- os << "}, target:{";
- for (auto it = target.begin(); it != target.end(); it++) {
- (**it).repr(os);
- if (next(it) != target.end()) os << " ";
- }
- os << "}, f:";
- f->repr(os);
- os << ", arity=" << arity << \
- ", order:{";
- for (auto it = order.begin(); it != order.end(); it++) {
- os << it->first << "->" << it->second;
- if (next(it) != order.end()) os << ", ";
- }
- os << "}>";
-
- return os;
- }
-
- ostream&
- escaped(ostream& os) const
- {
- lhs->escaped(os);
- os << " ||| ";
- for (auto it = rhs.begin(); it != rhs.end(); it++) {
- (**it).escaped(os);
- if (next(it) != rhs.end()) os << " ";
- }
- os << " ||| ";
- for (auto it = target.begin(); it != target.end(); it++) {
- (**it).escaped(os);
- if (next(it) != target.end()) os << " ";
- }
- os << " ||| ";
- f->escaped(os);
- os << " ||| " << \
- "TODO";
-
- return os;
- };
-
- friend ostream&
- operator<<(ostream& os, Rule const& r)
- {
- return r.repr(os);
- };
-
- // --
- void
- prep_for_serialization_()
- {
- ostringstream os;
- escaped(os);
- as_str_ = os.str();
- };
- MSGPACK_DEFINE(as_str_);
- // ^^^ FIXME
-};
-
-struct Grammar {
- vector<Rule*> rules;
- vector<Rule*> flat;
- vector<Rule*> start_non_terminal;
- vector<Rule*> start_terminal;
-
- Grammar() {}
-
- Grammar(string const& fn, Vocabulary& vocab)
- {
- ifstream ifs(fn);
- string line;
- while (getline(ifs, line)) {
- G::Rule* r = new G::Rule(line, vocab);
- rules.push_back(r);
- if (r->arity == 0)
- flat.push_back(r);
- else if (r->rhs.front()->type() == NON_TERMINAL)
- start_non_terminal.push_back(r);
- else
- start_terminal.push_back(r);
- }
- }
-
- void add_glue();
- // ^^^ TODO
- void add_pass_through(const string& input);
- // ^^^ TODO
-
- friend ostream&
- operator<<(ostream& os, Grammar const& g)
- {
- for (const auto it: g.rules) {
- it->repr(os);
- os << endl;
- }
-
- return os;
- }
-};
-
-} // namespace G
-
diff --git a/fast/hypergraph.cc b/fast/hypergraph.cc
deleted file mode 100644
index 2b33ff4..0000000
--- a/fast/hypergraph.cc
+++ /dev/null
@@ -1,363 +0,0 @@
-#include "hypergraph.hh"
-
-
-namespace Hg {
-
-template<typename Semiring> void
-init(const list<Node*>& nodes, const list<Node*>::iterator root, const Semiring& semiring)
-{
- for (const auto it: nodes)
- it->score = semiring.null;
- (**root).score = semiring.one;
-}
-
-void
-reset(const list<Node*> nodes, const vector<Edge*> edges)
-{
- for (const auto it: nodes)
- it->mark = 0;
- for (auto it: edges)
- it->mark = 0;
-}
-
-void
-topological_sort(list<Node*>& nodes, const list<Node*>::iterator root)
-{
- auto p = root;
- auto to = nodes.begin();
- while (to != nodes.end()) {
- if ((**p).is_marked()) {
- for (const auto e: (**p).outgoing) { // explore edges
- e->mark++;
- if (e->is_marked()) {
- e->head->mark++;
- }
- }
- }
- if ((**p).is_marked()) {
- nodes.splice(to, nodes, p);
- to++;
- p = to;
- } else {
- ++p;
- }
- }
-}
-
-void
-viterbi(Hypergraph& hg)
-{
- list<Node*>::iterator root = \
- find_if(hg.nodes.begin(), hg.nodes.end(), \
- [](Node* n) { return n->incoming.size() == 0; });
-
- Hg::topological_sort(hg.nodes, root);
- Semiring::Viterbi<score_t> semiring;
- Hg::init(hg.nodes, root, semiring);
-
- for (const auto n: hg.nodes) {
- for (const auto e: n->incoming) {
- score_t s = semiring.one;
- for (const auto m: e->tails) {
- s = semiring.multiply(s, m->score);
- }
- n->score = semiring.add(n->score, semiring.multiply(s, e->score));
- }
- }
-}
-
-void
-viterbi_path(Hypergraph& hg, Path& p)
-{
- list<Node*>::iterator root = \
- find_if(hg.nodes.begin(), hg.nodes.end(), \
- [](Node* n) { return n->incoming.size() == 0; });
- //list<Node*>::iterator root = hg.nodes.begin();
-
- Hg::topological_sort(hg.nodes, root);
- // ^^^ FIXME do I need to do this when reading from file?
- Semiring::Viterbi<score_t> semiring;
- Hg::init(hg.nodes, root, semiring);
-
- for (auto n: hg.nodes) {
- Edge* best_edge;
- bool best = false;
- for (auto e: n->incoming) {
- score_t s = semiring.one;
- for (auto m: e->tails) {
- s = semiring.multiply(s, m->score);
- }
- if (n->score < semiring.multiply(s, e->score)) { // find max
- best_edge = e;
- best = true;
- }
- n->score = semiring.add(n->score, semiring.multiply(s, e->score));
- }
- if (best)
- p.push_back(best_edge);
- }
-}
-
-
-void
-derive(const Path& p, const Node* cur, vector<string>& carry)
-{
- Edge* next;
- for (auto it: p) {
- if (it->head->symbol == cur->symbol &&
- it->head->left == cur->left &&
- it->head->right == cur->right) {
- next = it;
- }
- } // FIXME this is probably not so good
-
- unsigned j = 0;
- for (auto it: next->rule->target) {
- if (it->type() == G::NON_TERMINAL) {
- derive(p, next->tails[next->rule->order[j]], carry);
- j++;
- } else {
- carry.push_back(it->symbol());
- }
- }
-}
-
-namespace io {
-
-void
-read(Hypergraph& hg, vector<G::Rule*>& rules, G::Vocabulary& vocab, const string& fn)
-{
- ifstream ifs(fn);
- size_t i = 0, r, n, e;
- msgpack::unpacker pac;
- while(true) {
- pac.reserve_buffer(32*1024);
- size_t bytes = ifs.readsome(pac.buffer(), pac.buffer_capacity());
- pac.buffer_consumed(bytes);
- msgpack::unpacked result;
- while(pac.next(&result)) {
- msgpack::object o = result.get();
- if (i == 0) {
- o.convert(&r);
- } else if (i == 1) {
- o.convert(&n);
- } else if (i == 2) {
- o.convert(&e);
- } else if (i > 2 && i <= r+2) {
- string s;
- o.convert(&s);
- G::Rule* rule = new G::Rule;
- G::Rule::from_s(rule, s, vocab);
- rules.push_back(rule);
- } else if (i > r+2 && i <= r+n+2) {
- Node* n = new Node;
- o.convert(n);
- hg.nodes.push_back(n);
- hg.nodes_by_id[n->id] = n;
- } else if (i > n+2 && i <= r+n+e+2) {
- Edge* e = new Edge;
- e->arity = 0;
- o.convert(e);
- e->head = hg.nodes_by_id[e->head_id_];
- hg.edges.push_back(e);
- hg.nodes_by_id[e->head_id_]->incoming.push_back(e);
- e->arity = 0;
- for (auto it = e->tails_ids_.begin(); it != e->tails_ids_.end(); it++) {
- hg.nodes_by_id[*it]->outgoing.push_back(e);
- e->tails.push_back(hg.nodes_by_id[*it]);
- e->arity++;
- }
- e->rule = rules[e->rule_id_];
- } else {
- // ERROR
- }
- i++;
- }
- if (!bytes) break;
- }
-}
-
-void
-write(Hypergraph& hg, vector<G::Rule*>& rules, const string& fn) // FIXME
-{
- FILE* file = fopen(fn.c_str(), "wb");
- msgpack::fbuffer fbuf(file);
- msgpack::pack(fbuf, rules.size());
- msgpack::pack(fbuf, hg.nodes.size());
- msgpack::pack(fbuf, hg.edges.size());
- for (auto it = rules.cbegin(); it != rules.cend(); it++)
- msgpack::pack(fbuf, **it);
- for (auto it = hg.nodes.cbegin(); it != hg.nodes.cend(); it++)
- msgpack::pack(fbuf, **it);
- for (auto it = hg.edges.cbegin(); it != hg.edges.cend(); it++)
- msgpack::pack(fbuf, **it);
- fclose(file);
-}
-
-void
-manual(Hypergraph& hg, vector<G::Rule*>& rules, G::Vocabulary& vocab)
-{
- // nodes
- Node* a = new Node; a->id = 0; a->symbol = "root"; a->left = -1; a->right = -1; a->mark = 0;
- hg.nodes.push_back(a); hg.nodes_by_id[a->id] = a;
- Node* b = new Node; b->id = 1; b->symbol = "NP"; b->left = 0; b->right = 1; b->mark = 0;
- hg.nodes.push_back(b); hg.nodes_by_id[b->id] = b;
- Node* c = new Node; c->id = 2; c->symbol = "V"; c->left = 1; c->right = 2; c->mark = 0;
- hg.nodes.push_back(c); hg.nodes_by_id[c->id] = c;
- Node* d = new Node; d->id = 3; d->symbol = "JJ"; d->left = 3; d->right = 4; d->mark = 0;
- hg.nodes.push_back(d); hg.nodes_by_id[d->id] = d;
- Node* e = new Node; e->id = 4; e->symbol = "NN"; e->left = 3; e->right = 5; e->mark = 0;
- hg.nodes.push_back(e); hg.nodes_by_id[e->id] = e;
- Node* f = new Node; f->id = 5; f->symbol = "NP"; f->left = 2; f->right = 5; f->mark = 0;
- hg.nodes.push_back(f); hg.nodes_by_id[f->id] = f;
- Node* g = new Node; g->id = 6; g->symbol = "NP"; g->left = 1; g->right = 5; g->mark = 0;
- hg.nodes.push_back(g); hg.nodes_by_id[g->id] = g;
- Node* h = new Node; h->id = 7; h->symbol = "S"; h->left = 0; h->right = 6; h->mark = 0;
- hg.nodes.push_back(h); hg.nodes_by_id[h->id] = h;
-
- // rules
- vector<string> rule_strs;
- rule_strs.push_back("[NP] ||| ich ||| i ||| ||| ");
- rule_strs.push_back("[V] ||| sah ||| saw ||| ||| ");
- rule_strs.push_back("[JJ] ||| kleines ||| small ||| ||| ");
- rule_strs.push_back("[JJ] ||| kleines ||| little ||| ||| ");
- rule_strs.push_back("[NN] ||| kleines haus ||| small house ||| ||| ");
- rule_strs.push_back("[NN] ||| kleines haus ||| little house ||| ||| ");
- rule_strs.push_back("[NN] ||| [JJ,1] haus ||| [JJ,1] shell ||| ||| ");
- rule_strs.push_back("[NN] ||| [JJ,1] haus ||| [JJ,1] house ||| ||| ");
- rule_strs.push_back("[NP] ||| ein [NN,1] ||| a [NN,1] ||| ||| ");
- rule_strs.push_back("[VP] ||| [V,1] [NP,2] ||| [V,1] [NP,2] ||| ||| ");
- rule_strs.push_back("[S] ||| [NP,1] [VP,2] ||| [NP,1] [VP,2] ||| ||| ");
-
- for (auto it: rule_strs) {
- rules.push_back(new G::Rule(it, vocab));
- rules.back()->f = new Sv::SparseVector<string, score_t>();
- }
-
- // edges
- Edge* q = new Edge; q->head = hg.nodes_by_id[1]; q->tails.push_back(hg.nodes_by_id[0]); q->score = 0.367879441171;
- q->arity = 1; q->mark = 0;
- hg.edges.push_back(q);
- hg.nodes_by_id[1]->incoming.push_back(q);
- hg.nodes_by_id[0]->outgoing.push_back(q);
- q->rule = rules[0];
-
- Edge* p = new Edge; p->head = hg.nodes_by_id[2]; p->tails.push_back(hg.nodes_by_id[0]); p->score = 0.606530659713;
- p->arity = 1; p->mark = 0;
- hg.edges.push_back(p);
- hg.nodes_by_id[2]->incoming.push_back(p);
- hg.nodes_by_id[0]->outgoing.push_back(p);
- p->rule = rules[1];
-
- Edge* r = new Edge; r->head = hg.nodes_by_id[3]; r->tails.push_back(hg.nodes_by_id[0]); r->score = 1.0;
- r->arity = 1; r->mark = 0;
- hg.edges.push_back(r);
- hg.nodes_by_id[3]->incoming.push_back(r);
- hg.nodes_by_id[0]->outgoing.push_back(r);
- r->rule = rules[2];
-
- Edge* s = new Edge; s->head = hg.nodes_by_id[3]; s->tails.push_back(hg.nodes_by_id[0]); s->score = 1.0;
- s->arity = 1; s->mark = 0;
- hg.edges.push_back(s);
- hg.nodes_by_id[3]->incoming.push_back(s);
- hg.nodes_by_id[0]->outgoing.push_back(s);
- s->rule = rules[3];
-
- Edge* t = new Edge; t->head = hg.nodes_by_id[4]; t->tails.push_back(hg.nodes_by_id[0]); t->score = 1.0;
- t->arity = 1; t->mark = 0;
- hg.edges.push_back(t);
- hg.nodes_by_id[4]->incoming.push_back(t);
- hg.nodes_by_id[0]->outgoing.push_back(t);
- t->rule = rules[4];
-
- Edge* u = new Edge; u->head = hg.nodes_by_id[4]; u->tails.push_back(hg.nodes_by_id[0]); u->score = 1.0;
- u->arity = 1; u->mark = 0;
- hg.edges.push_back(u);
- hg.nodes_by_id[4]->incoming.push_back(u);
- hg.nodes_by_id[0]->outgoing.push_back(u);
- u->rule = rules[5];
-
- Edge* v = new Edge; v->head = hg.nodes_by_id[4]; v->tails.push_back(hg.nodes_by_id[3]); v->score = 1.0;
- v->arity = 1; v->mark = 0;
- hg.edges.push_back(v);
- hg.nodes_by_id[4]->incoming.push_back(v);
- hg.nodes_by_id[3]->outgoing.push_back(v);
- v->rule = rules[6];
-
- Edge* w = new Edge; w->head = hg.nodes_by_id[4]; w->tails.push_back(hg.nodes_by_id[3]); w->score = 2.71828182846;
- w->arity = 1; w->mark = 0;
- hg.edges.push_back(w);
- hg.nodes_by_id[4]->incoming.push_back(w);
- hg.nodes_by_id[3]->outgoing.push_back(w);
- w->rule = rules[7];
-
- Edge* x = new Edge; x->head = hg.nodes_by_id[5]; x->tails.push_back(hg.nodes_by_id[4]); x->score = 1.0;
- x->arity = 1; x->mark = 0;
- hg.edges.push_back(x);
- hg.nodes_by_id[5]->incoming.push_back(x);
- hg.nodes_by_id[4]->outgoing.push_back(x);
- x->rule = rules[8];
-
- Edge* y = new Edge; y->head = hg.nodes_by_id[6]; y->tails.push_back(hg.nodes_by_id[2]); y->tails.push_back(hg.nodes_by_id[5]); y->score = 1.0;
- y->arity = 2; y->mark = 0;
- hg.edges.push_back(y);
- hg.nodes_by_id[6]->incoming.push_back(y);
- hg.nodes_by_id[2]->outgoing.push_back(y);
- hg.nodes_by_id[5]->outgoing.push_back(y);
- y->rule = rules[9];
-
- Edge* z = new Edge; z->head = hg.nodes_by_id[7]; z->tails.push_back(hg.nodes_by_id[1]); z->tails.push_back(hg.nodes_by_id[6]); z->score = 1.0;
- z->arity = 2; z->mark = 0;
- hg.edges.push_back(z);
- hg.nodes_by_id[7]->incoming.push_back(z);
- hg.nodes_by_id[1]->outgoing.push_back(z);
- hg.nodes_by_id[6]->outgoing.push_back(z);
- z->rule = rules[10];
-}
-
-} // namespace Hg::io
-
-/*
- * Hg::Node
- *
- */
-ostream&
-operator<<(ostream& os, const Node& n)
-{
- os << \
- "Node<id=" << n.id << \
- ", symbol='" << n.symbol << "'" << \
- ", span=(" << n.left << "," << n.right << ")" \
- ", score=" << n.score << \
- ", incoming:" << n.incoming.size() << \
- ", outgoing:" << n.outgoing.size() << \
- ", mark=" << n.mark << ">";
- return os;
-}
-
-/*
- * Hg::Edge
- *
- */
-ostream&
-operator<<(ostream& os, const Edge& e)
-{
- ostringstream _;
- for (auto it: e.tails) {
- _ << it->id;
- if (it != e.tails.back()) _ << ",";
- }
- os << \
- "Edge<head=" << e.head->id << \
- ", tails=[" << _.str() << "]" \
- ", score=" << e.score << \
- ", rule:'";
- e.rule->escaped(os);
- os << "', f=" << "TODO" << \
- ", arity=" << e.arity << \
- ", mark=" << e.mark << ">";
- return os;
-}
-
-} // namespace Hg
-
diff --git a/fast/hypergraph.hh b/fast/hypergraph.hh
deleted file mode 100644
index 1c48a88..0000000
--- a/fast/hypergraph.hh
+++ /dev/null
@@ -1,103 +0,0 @@
-#pragma once
-
-#include <algorithm>
-#include <fstream>
-#include <functional>
-#include <iostream>
-#include <iterator>
-#include <list>
-#include <msgpack.hpp>
-#include <msgpack/fbuffer.hpp>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include "grammar.hh"
-#include "semiring.hh"
-#include "sparse_vector.hh"
-#include "weaver.hh"
-
-using namespace std;
-
-
-namespace Hg {
-
-struct Node;
-
-struct Edge {
- Node* head;
- vector<Node*> tails;
- score_t score;
- G::Rule* rule;
- unsigned int arity = 0;
- unsigned int mark = 0;
-
- inline bool is_marked() { return mark >= arity; }
- friend ostream& operator<<(ostream& os, const Edge& e);
-
- size_t head_id_;
- vector<size_t> tails_ids_; // node ids
- size_t rule_id_;
-
- MSGPACK_DEFINE(head_id_, tails_ids_, rule_id_, score, arity);
-};
-
-struct Node {
- size_t id;
- string symbol;
- short left;
- short right;
- score_t score;
- vector<Edge*> incoming;
- vector<Edge*> outgoing;
- unsigned int mark;
-
- inline bool is_marked() { return mark >= incoming.size(); };
- friend ostream& operator<<(ostream& os, const Node& n);
-
- MSGPACK_DEFINE(id, symbol, left, right, score);
-};
-
-struct Hypergraph {
- list<Node*> nodes;
- vector<Edge*> edges;
- unordered_map<size_t, Node*> nodes_by_id;
- unsigned int arity;
-};
-
-template<typename Semiring> void
-init(const list<Node*>& nodes, const list<Node*>::iterator root, const Semiring& semiring);
-
-void
-reset(const list<Node*> nodes, const vector<Edge*> edges);
-
-void
-topological_sort(list<Node*>& nodes, const list<Node*>::iterator root);
-
-void
-viterbi(Hypergraph& hg);
-
-typedef vector<Edge*> Path;
-
-void
-viterbi_path(Hypergraph& hg, Path& p);
-
-void
-derive(const Path& p, const Node* cur, vector<string>& carry);
-
-namespace io {
-
-void
-read(Hypergraph& hg, vector<G::Rule*>& rules, G::Vocabulary& vocab, const string& fn); // FIXME
-
-void
-write(Hypergraph& hg, vector<G::Rule*>& rules, const string& fn); // FIXME
-
-void
-manual(Hypergraph& hg, vector<G::Rule*>& rules);
-
-} // namespace
-
-} // namespace
-
diff --git a/fast/main.cc b/fast/main.cc
deleted file mode 100644
index 825ad59..0000000
--- a/fast/main.cc
+++ /dev/null
@@ -1,27 +0,0 @@
-#include "hypergraph.hh"
-#include <ctime>
-
-
-int
-main(int argc, char** argv)
-{
- Hg::Hypergraph hg;
- G::Vocabulary y;
- G::Grammar g;
- Hg::io::read(hg, g.rules, y, argv[1]);
- //Hg::io::manual(hg, g.rules);
- clock_t begin = clock();
- Hg::Path p;
- Hg::viterbi_path(hg, p);
- vector<string> s;
- Hg::derive(p, p.back()->head, s);
- for (auto it: s)
- cout << it << " ";
- cout << endl;
- clock_t end = clock();
- double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
- cout << elapsed_secs << " s" << endl;
-
- return 0;
-}
-
diff --git a/fast/parse.hh b/fast/parse.hh
deleted file mode 100644
index 33ea9ce..0000000
--- a/fast/parse.hh
+++ /dev/null
@@ -1,108 +0,0 @@
-#pragma once
-
-#include <vector>
-#include <utility>
-#include <sstream>
-#include <unordered_map>
-
-#include "grammar.hh"
-#include "util.hh"
-#include "weaver.hh"
-
-
-using namespace std;
-
-typedef pair<size_t,size_t> Span;
-namespace std {
- template <>
- struct hash<Span>
- {
- size_t
- operator()(Span const& k) const
- {
- return ((hash<size_t>()(k.first)
- ^ (hash<size_t>()(k.second) << 1)) >> 1);
- }
- };
-}
-
-namespace Parse {
-
-void visit(vector<Span>& p,
- size_t i, size_t l, size_t r, size_t x=0)
-{
- for (size_t span = i; span <= r-x; span++) {
- for (size_t k = l; k <= r-span; k++) {
- p.push_back(Span(k,k+span));
- }
- }
-}
-
-struct ChartItem
-{
- Span span;
- G::Rule const* rule;
- vector<ChartItem*> tails;
- size_t dot;
-
- ChartItem(G::Rule* r)
- {
- rule = r;
- dot = 0;
- }
-
- ChartItem(ChartItem const& o)
- {
- span.first = o.span.first;
- span.second = o.span.second;
- rule = o.rule;
- for (auto it: o.tails)
- tails.push_back(it);
- dot = o.dot;
- }
-};
-
-struct Chart
-{
- size_t n_;
- unordered_map<Span, vector<ChartItem*> > m_;
- unordered_map<string,bool> b_;
-
- vector<ChartItem*>& at(Span s)
- {
- return m_[s];
- }
-
- string h(ChartItem* item, Span s)
- {
- ostringstream ss;
- item->rule->lhs->symbol();
- ss << s.first;
- ss << s.second;
-
- return ss.str();
- }
-
- void add(ChartItem* item, Span s)
- {
- m_[s].push_back(item);
- b_[h(item, s)] = true;
- }
-
- Chart(size_t n)
- {
- }
-};
-
-
-void
-init(vector<symbol_t> const& in, size_t n, Chart& active, Chart& passive, G::Grammar const& g)
-{
- for (auto rule: g.rules) {
- cout << *rule << endl;
- }
-}
-
-
-} //
-
diff --git a/fast/semiring.hh b/fast/semiring.hh
deleted file mode 100644
index 3f4ac08..0000000
--- a/fast/semiring.hh
+++ /dev/null
@@ -1,35 +0,0 @@
-#pragma once
-
-
-namespace Semiring {
-
-template<typename T>
-struct Viterbi {
- T one = 1.0;
- T null = 0.0;
-
- T add(T x, T y);
- T multiply(T x, T y);
- T convert(T x);
-};
-
-template<typename T> T
-Viterbi<T>::add(T x, T y)
-{
- return max(x, y);
-}
-
-template<typename T> T
-Viterbi<T>::multiply(T x, T y)
-{
- return x * y;
-}
-
-template<typename T> T
-Viterbi<T>::convert(T x)
-{
- return (T)x;
-}
-
-} // namespace
-
diff --git a/fast/sparse_vector.hh b/fast/sparse_vector.hh
deleted file mode 100644
index 9d815ff..0000000
--- a/fast/sparse_vector.hh
+++ /dev/null
@@ -1,186 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include "util.hh"
-#include "weaver.hh"
-
-using namespace std;
-
-
-namespace Sv {
-
-template<typename K, typename V>
-struct SparseVector {
- unordered_map<K,V> m_;
- V zero = 0.f;
-
- SparseVector() {};
-
- SparseVector(string& s)
- {
- from_s(this, s);
- };
-
- void
- insert(K k, V v) { m_[k] = v; };
-
- V
- dot(SparseVector& other)
- {
- V r;
- unordered_map<K,V>* o = &m_;
- auto b = m_.cbegin();
- auto e = m_.cend();
- if (other.size() < size()) {
- b = other.m_.cbegin();
- e = other.m_.cend();
- o = &other.m_;
- }
- for (auto it = b; it != e; it++)
- r += it->second * o->at(it->first);
-
- return r;
- };
-
- size_t
- size()
- {
- return m_.size();
- }
-
- V&
- operator[](const K& k)
- {
- return at(k);
- };
-
- const V&
- at(const K& k) const
- {
- if (m_.find(k) == m_.end())
- return zero;
- else
- return m_.at(k);
- }
-
- SparseVector
- operator+(const SparseVector& other) const
- {
- SparseVector<K,V> v;
- v.m_.insert(m_.cbegin(), m_.cend());
- v.m_.insert(other.m_.cbegin(), other.m_.cend());
- for (const auto it: v.m_)
- v.m_[it.first] = this->at(it.first) + other.at(it.first);
-
- return v;
- };
-
- SparseVector&
- operator+=(const SparseVector& other)
- {
- for (const auto it: other.m_)
- m_[it.first] += it.second;
-
- return *this;
- };
-
- SparseVector
- operator-(const SparseVector& other) const
- {
- SparseVector<K,V> v;
- v.m_.insert(m_.cbegin(), m_.cend());
- v.m_.insert(other.m_.cbegin(), other.m_.cend());
- for (const auto it: v.m_)
- v.m_[it.first] = this->at(it.first) - other.at(it.first);
-
- return v;
- };
-
- SparseVector&
- operator-=(const SparseVector& other)
- {
- for (const auto it: other.m_)
- m_[it.first] -= it.second;
-
- return *this;
- };
-
- SparseVector
- operator*(V f) const
- {
- SparseVector<K,V> v;
- for (const auto it: m_)
- v.m_[it.first] = this->at(it.first) * f;
-
- return v;
- };
-
- SparseVector&
- operator*=(V f)
- {
- for (const auto it: m_)
- m_[it.first] *= f;
-
- return *this;
- };
-
- static void
- from_s(SparseVector* w, const string& s)
- {
- stringstream ss(s);
- while (!ss.eof()) {
- string t;
- ss >> t;
- size_t eq = t.find_first_of("=");
- if (eq == string::npos) {
- return;
- }
- t.replace(eq, 1, " ");
- stringstream tt(t);
- K k; V v;
- tt >> k >> v;
- w->m_.emplace(k.substr(k.find_first_of("\"")+1, k.find_last_of("\"")-1), v);
- }
- }
-
- ostream&
- repr(ostream& os) const
- {
- os << "SparseVector<{";
- for (auto it = m_.cbegin(); it != m_.cend(); it++) {
- os << "'" << it->first << "'=" << it->second;
- if (next(it) != m_.end())
- os << ", ";
- }
- os << "}>";
-
- return os;
- };
-
- ostream&
- escaped(ostream& os, bool quote_keys=false) const {
- for (auto it = m_.cbegin(); it != m_.cend(); it++) {
- if (quote_keys) os << '"';
- os << util::json_escape(it->first);
- if (quote_keys) os << '"';
- os << "=" << it->second;
- if (next(it) != m_.cend()) os << " ";
- }
-
- return os;
- };
-
- friend ostream&
- operator<<(ostream& os, const SparseVector& v)
- {
- return v.repr(os);
- }
-};
-
-} // namespace
-
diff --git a/fast/test/Makefile b/fast/test/Makefile
deleted file mode 100644
index 65e97ef..0000000
--- a/fast/test/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-COMPILER=g++
-CFLAGS=-std=c++11 -O3 -I../
-TCMALLOC=/home/pks/src/weaver/fast/gperftools-2.1/lib/libtcmalloc_minimal.a -pthread
-
-
-all: test_grammar test_sparse_vector test_parse
-
-test_grammar: test_grammar.cc ../grammar.hh
- $(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) test_grammar.cc -o test_grammar
-
-test_sparse_vector: test_sparse_vector.cc ../sparse_vector.hh
- $(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) test_sparse_vector.cc -o test_sparse_vector
-
-test_parse: test_parse.cc ../parse.hh ../grammar.hh ../util.hh
- $(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) test_parse.cc -o test_parse
-
-clean:
- rm -f test_grammar test_sparse_vector
-
diff --git a/fast/test/test_grammar b/fast/test/test_grammar
deleted file mode 100755
index 6cf7ad5..0000000
--- a/fast/test/test_grammar
+++ /dev/null
Binary files differ
diff --git a/fast/test/test_grammar.cc b/fast/test/test_grammar.cc
deleted file mode 100644
index bbe76e7..0000000
--- a/fast/test/test_grammar.cc
+++ /dev/null
@@ -1,20 +0,0 @@
-#include <fstream>
-
-#include "grammar.hh"
-
-using namespace std;
-
-
-int
-main(int argc, char** argv)
-{
- G::Vocabulary y;
- G::Grammar g(argv[1], y);
- for (auto it: g.rules) {
- it->escaped(cout);
- cout << endl;
- }
-
- return 0;
-}
-
diff --git a/fast/test/test_sparse_vector b/fast/test/test_sparse_vector
deleted file mode 100755
index c06fe9e..0000000
--- a/fast/test/test_sparse_vector
+++ /dev/null
Binary files differ
diff --git a/fast/test/test_sparse_vector.cc b/fast/test/test_sparse_vector.cc
deleted file mode 100644
index 426bed1..0000000
--- a/fast/test/test_sparse_vector.cc
+++ /dev/null
@@ -1,37 +0,0 @@
-#include "sparse_vector.hh"
-
-
-int
-main(void)
-{
- Sv::SparseVector<string, score_t> a;
- a.insert("1", 1);
- a.insert("2", 2);
- cout << "a:" << a << endl;
-
- Sv::SparseVector<string, score_t> b;
- b.insert("2", 2);
- cout << "b:" << b << endl;
-
- Sv::SparseVector<string, score_t> c = a + b;
- cout << "a+b:" << c << endl;
-
- a += b;
- cout << "a+=b:" << a << endl;
-
- a -= b;
- cout << "a-=b:" << a << endl;
-
- cout << "a*2:" << a*2 << endl;
-
- a *= 2;
- cout << "a*=2:" << a << endl;
-
- string s("\"a\"=2 \"b\"=3");
- Sv::SparseVector<string, score_t>* sv = new Sv::SparseVector<string, score_t>(s);
- cout << *sv << endl;
- cout << sv->dot(*sv) << endl;
-
- return 0;
-}
-
diff --git a/fast/util.hh b/fast/util.hh
deleted file mode 100644
index 9ce19da..0000000
--- a/fast/util.hh
+++ /dev/null
@@ -1,47 +0,0 @@
-#pragma once
-
-#include <string>
-
-#include "weaver.hh"
-
-using namespace std;
-
-
-namespace util {
-
-inline string
-json_escape(const string& s)
-{
- ostringstream os;
- for (auto it = s.cbegin(); it != s.cend(); it++) {
- switch (*it) {
- case '"': os << "\\\""; break;
- case '\\': os << "\\\\"; break;
- case '\b': os << "\\b"; break;
- case '\f': os << "\\f"; break;
- case '\n': os << "\\n"; break;
- case '\r': os << "\\r"; break;
- case '\t': os << "\\t"; break;
- default: os << *it; break;
- }
- }
-
- return os.str();
-}
-
-inline vector<symbol_t>
-tokenize(string s)
-{
- istringstream ss(s);
- vector<symbol_t> r;
- while (ss.good()) {
- string buf;
- ss >> buf;
- r.push_back(buf);
- }
-
- return r;
-}
-
-} // namespace util
-
diff --git a/fast/weaver.hh b/fast/weaver.hh
deleted file mode 100644
index e89b4dd..0000000
--- a/fast/weaver.hh
+++ /dev/null
@@ -1,10 +0,0 @@
-#pragma once
-
-#include <string>
-
-using namespace std;
-
-
-typedef double score_t;
-typedef string symbol_t;
-