From 190f68c880eb27506669e95e2bc0493e2ec42c4c Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Sun, 17 Aug 2014 07:51:16 +0100
Subject: functional again
---
fast/Makefile | 3 +--
fast/README.md | 9 ++++++---
fast/grammar.cc | 46 ++++++++++++++++++++++++++++++++++++----------
fast/grammar.hh | 1 +
fast/hypergraph.cc | 28 +++++++++++++++++++---------
fast/hypergraph.hh | 2 +-
fast/main.cc | 11 +++++++----
fast/sparse_vector.hh | 38 +++++++++++++++++++++++++-------------
8 files changed, 96 insertions(+), 42 deletions(-)
(limited to 'fast')
diff --git a/fast/Makefile b/fast/Makefile
index 40ce0eb..9e88076 100644
--- a/fast/Makefile
+++ b/fast/Makefile
@@ -1,11 +1,10 @@
-COMPILER=clang
+COMPILER=g++
CFLAGS=-std=c++11 -O3
all: grammar.o hypergraph.o main.cc
$(COMPILER) $(CFLAGS) -std=c++11 -lstdc++ -lm -lmsgpack grammar.o hypergraph.o main.cc -o fast_weaver
-
hypergraph.o: hypergraph.cc hypergraph.hh grammar.o semiring.hh sparse_vector.hh weaver.hh
$(COMPILER) $(CFLAGS) -g -c hypergraph.cc
diff --git a/fast/README.md b/fast/README.md
index a11bd85..1d6bd04 100644
--- a/fast/README.md
+++ b/fast/README.md
@@ -7,12 +7,12 @@ TODO
* other semirings
* include language model
* compress/hash words/feature strings?
-
+ * cast? Rule -> Edge, ChartItem -> Node
+ * feature factory, observer
Dependencies:
* MessagePack for object serialization [1]
* kenlm language model [2]
-
This is Linux only.
@@ -20,6 +20,8 @@ This is Linux only.
[1] http://msgpack.org
[2] http://kheafield.com/code/kenlm/
+
+stuff to have a look at:
http://math.nist.gov/spblas/
http://lapackpp.sourceforge.net/
http://www.cvmlib.com/
@@ -30,5 +32,6 @@ http://bytes.com/topic/c/answers/702569-blas-vs-cblas-c
http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack
http://www.osl.iu.edu/research/mtl/download.php3
http://scicomp.stackexchange.com/questions/351/recommendations-for-a-usable-fast-c-matrix-library
-
+https://software.intel.com/en-us/tbb_4.2_doc
http://goog-perftools.sourceforge.net/doc/tcmalloc.html
+
diff --git a/fast/grammar.cc b/fast/grammar.cc
index 558f6e6..a003eb4 100644
--- a/fast/grammar.cc
+++ b/fast/grammar.cc
@@ -10,7 +10,18 @@ namespace G {
NT::NT(string& s)
{
s.erase(0, 1); s.pop_back(); // remove '[' and ']'
- stringstream ss(s);
+ istringstream ss(s);
+ if (ss >> index) { // [i]
+ symbol = "";
+ index = stoi(s);
+
+ return;
+ } else { // [X]
+ symbol = s;
+ index = 0;
+
+ return;
+ }
string buf;
size_t j = 0;
index = 0; // default
@@ -135,28 +146,43 @@ operator<<(ostream& os, const Item& i)
*
*/
Rule::Rule(const string& s)
+{
+ from_s(this, s);
+}
+
+void
+Rule::from_s(Rule* r, const string& s)
{
stringstream ss(s);
size_t j = 0;
string buf;
- arity = 0;
+ r->arity = 0;
size_t index = 1;
+ vector rhs_nt;
+ r->f = new Sv::SparseVector();
while (ss >> buf) {
if (buf == "|||") { j++; continue; }
if (j == 0) { // LHS
- lhs = new NT(buf);
+ r->lhs = new NT(buf);
} else if (j == 1) { // RHS
- rhs.push_back(new Item(buf));
- if (rhs.back()->type == NON_TERMINAL) arity++;
+ r->rhs.push_back(new Item(buf));
+ if (r->rhs.back()->type == NON_TERMINAL) {
+ rhs_nt.push_back(r->rhs.back()->nt);
+ r->arity++;
+ }
} else if (j == 2) { // TARGET
- target.push_back(new Item(buf));
- if (target.back()->type == NON_TERMINAL) {
- order.insert(make_pair(index, target.back()->nt->index));
+ r->target.push_back(new Item(buf));
+ if (r->target.back()->type == NON_TERMINAL) {
+ r->order.insert(make_pair(index, r->target.back()->nt->index));
+ if (r->target.back()->nt->symbol == "")
+ r->target.back()->nt->symbol = rhs_nt[r->target.back()->nt->index-1]->symbol;
index++;
}
} else if (j == 3) { // F TODO
+ Sv::SparseVector::from_s(r->f, buf); // FIXME this is slow!!!
} else if (j == 4) { // A TODO
- } else { // ERROR
+ } else {
+ // ERROR
}
if (j == 4) break;
}
@@ -203,7 +229,7 @@ Rule::escaped() const
os << " ||| ";
os << f->escaped();
os << " ||| ";
- os << "TODO";
+ os << "TODO(alignment)";
return os.str();
}
diff --git a/fast/grammar.hh b/fast/grammar.hh
index 48a5116..1b9ac5a 100644
--- a/fast/grammar.hh
+++ b/fast/grammar.hh
@@ -69,6 +69,7 @@ Sv::SparseVector* f;
Rule() {};
Rule(const string& s);
+ static void from_s(Rule* r, const string& s);
string repr() const;
string escaped() const;
diff --git a/fast/hypergraph.cc b/fast/hypergraph.cc
index e1debb1..a9a44f9 100644
--- a/fast/hypergraph.cc
+++ b/fast/hypergraph.cc
@@ -73,7 +73,7 @@ viterbi_path(Hypergraph& hg, Path& p)
find_if(hg.nodes.begin(), hg.nodes.end(), \
[](Node* n) { return n->incoming.size() == 0; });
- Hg::topological_sort(hg.nodes, root);
+ Hg::topological_sort(hg.nodes, root); // FIXME do I need to do this when reading from file?
Semiring::Viterbi semiring;
Hg::init(hg.nodes, root, semiring);
@@ -107,7 +107,8 @@ derive(const Path& p, const Node* cur, vector& carry)
it->head->right == cur->right) {
next = it;
}
- }
+ } // FIXME this is probably not so good
+
unsigned j = 0;
for (auto it: next->rule->target) {
if (it->type == G::NON_TERMINAL) {
@@ -125,7 +126,7 @@ void
read(Hypergraph& hg, vector& rules, const string& fn) // FIXME
{
ifstream ifs(fn);
- size_t i = 0, nr, nn, ne;
+ size_t i = 0, r, n, e;
msgpack::unpacker pac;
while(true) {
pac.reserve_buffer(32*1024);
@@ -135,17 +136,23 @@ read(Hypergraph& hg, vector& rules, const string& fn) // FIXME
while(pac.next(&result)) {
msgpack::object o = result.get();
if (i == 0) {
- o.convert(&nn);
- nn += 1;
+ o.convert(&r);
} else if (i == 1) {
- o.convert(&ne);
- ne += 1;
- } else if (i > 1 && i <= nn) {
+ o.convert(&n);
+ } else if (i == 2) {
+ o.convert(&e);
+ } else if (i > 2 && i <= r+2) {
+ string s;
+ o.convert(&s);
+ G::Rule* rule = new G::Rule;
+ G::Rule::from_s(rule, s);
+ rules.push_back(rule);
+ } else if (i > r+2 && i <= r+n+2) {
Node* n = new Node;
o.convert(n);
hg.nodes.push_back(n);
hg.nodes_by_id[n->id] = n;
- } else if (i > nn && i <= nn+ne+1) {
+ } else if (i > n+2 && i <= r+n+e+2) {
Edge* e = new Edge;
e->arity = 0;
o.convert(e);
@@ -158,6 +165,9 @@ read(Hypergraph& hg, vector& rules, const string& fn) // FIXME
e->tails.push_back(hg.nodes_by_id[*it]);
e->arity++;
}
+ e->rule = rules[e->rule_id_];
+ } else {
+ // ERROR
}
i++;
}
diff --git a/fast/hypergraph.hh b/fast/hypergraph.hh
index 699bfdf..299a62d 100644
--- a/fast/hypergraph.hh
+++ b/fast/hypergraph.hh
@@ -92,7 +92,7 @@ void
read(Hypergraph& hg, vector& rules, const string& fn); // FIXME
void
-write(Hypergraph& hg, vector& rules, const string& fn); // TODO
+write(Hypergraph& hg, vector& rules, const string& fn); // FIXME
void
manual(Hypergraph& hg, vector& rules);
diff --git a/fast/main.cc b/fast/main.cc
index 59e25d5..08fcfcf 100644
--- a/fast/main.cc
+++ b/fast/main.cc
@@ -1,4 +1,5 @@
#include "hypergraph.hh"
+#include
int
@@ -6,9 +7,9 @@ main(int argc, char** argv)
{
Hg::Hypergraph hg;
G::Grammar g;
-//Hg::io::read(hg, g.rules, argv[1]);
- Hg::io::manual(hg, g.rules);
-
+ Hg::io::read(hg, g.rules, argv[1]);
+ //Hg::io::manual(hg, g.rules);
+ clock_t begin = clock();
Hg::Path p;
Hg::viterbi_path(hg, p);
vector s;
@@ -16,7 +17,9 @@ main(int argc, char** argv)
for (auto it: s)
cout << it << " ";
cout << endl;
-
+ clock_t end = clock();
+ double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
+ cout << elapsed_secs << " s" << endl;
return 0;
}
diff --git a/fast/sparse_vector.hh b/fast/sparse_vector.hh
index e497769..3583240 100644
--- a/fast/sparse_vector.hh
+++ b/fast/sparse_vector.hh
@@ -22,17 +22,7 @@ struct SparseVector {
SparseVector() {};
SparseVector(string& s)
{
- stringstream ss(s);
- while (!ss.eof()) {
- string t;
- ss >> t;
- size_t eq = t.find_first_of("=");
- t.replace(eq, 1, " ");
- stringstream tt(t);
- K k; V v;
- tt >> k >> v;
- m_.emplace(k.substr(k.find_first_of("\"")+1, k.find_last_of("\"")-1), v);
- }
+ from_s(this, s);
};
void
@@ -138,6 +128,25 @@ struct SparseVector {
return *this;
};
+ static void
+ from_s(SparseVector* w, const string& s)
+ {
+ stringstream ss(s);
+ while (!ss.eof()) {
+ string t;
+ ss >> t;
+ size_t eq = t.find_first_of("=");
+ if (eq == string::npos) {
+ return;
+ }
+ t.replace(eq, 1, " ");
+ stringstream tt(t);
+ K k; V v;
+ tt >> k >> v;
+ w->m_.emplace(k.substr(k.find_first_of("\"")+1, k.find_last_of("\"")-1), v);
+ }
+ }
+
string
repr() const
{
@@ -154,10 +163,13 @@ struct SparseVector {
};
string
- escaped() const {
+ escaped(bool quote_keys=false) const {
ostringstream os;
for (auto it = m_.cbegin(); it != m_.cend(); it++) {
- os << '"' << util::json_escape(it->first) << '"' << "=" << it->second;
+ if (quote_keys) os << '"';
+ os << util::json_escape(it->first);
+ if (quote_keys) os << '"';
+ os << "=" << it->second;
if (next(it) != m_.cend()) os << " ";
}
--
cgit v1.2.3