From 0b3cdb4ae2fa176ba74a48ff7a1616395079c151 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Tue, 5 Aug 2014 22:46:43 +0200
Subject: too much to tell
---
fast/Makefile | 9 +-
fast/README.md | 2 +
fast/dummyvector.h | 28 ------
fast/grammar.cc | 243 ++++++++++++++++++++++++---------------------
fast/grammar.hh | 58 +++++++----
fast/hypergraph.cc | 147 ++++++++++++++++++++++-----
fast/hypergraph.hh | 45 +++++----
fast/main.cc | 14 ++-
fast/semiring.hh | 1 +
fast/sparse_vector.hh | 106 +++++++++++++++-----
fast/test_grammar.cc | 4 +-
fast/test_sparse_vector.cc | 11 +-
fast/util.hh | 29 ++++++
fast/weaver.hh | 4 +
hg.rb | 2 +-
test/test_hg.rb | 2 +-
util/cdec2json.py | 30 ++++--
17 files changed, 476 insertions(+), 259 deletions(-)
delete mode 100644 fast/dummyvector.h
create mode 100644 fast/util.hh
create mode 100644 fast/weaver.hh
diff --git a/fast/Makefile b/fast/Makefile
index 6d05fea..40ce0eb 100644
--- a/fast/Makefile
+++ b/fast/Makefile
@@ -2,17 +2,18 @@ COMPILER=clang
CFLAGS=-std=c++11 -O3
-all: hypergraph.o main.cc
+all: grammar.o hypergraph.o main.cc
$(COMPILER) $(CFLAGS) -std=c++11 -lstdc++ -lm -lmsgpack grammar.o hypergraph.o main.cc -o fast_weaver
-test: test_grammar test_sparse_vector
-hypergraph.o: hypergraph.cc hypergraph.hh grammar.o semiring.hh
+hypergraph.o: hypergraph.cc hypergraph.hh grammar.o semiring.hh sparse_vector.hh weaver.hh
$(COMPILER) $(CFLAGS) -g -c hypergraph.cc
-grammar.o: grammar.cc grammar.hh
+grammar.o: grammar.cc grammar.hh sparse_vector.hh util.hh
$(COMPILER) $(CFLAGS) -g -c grammar.cc
+test: test_grammar test_sparse_vector
+
test_grammar: test_grammar.cc grammar.o
$(COMPILER) $(CFLAGS) -lstdc++ -lm grammar.o test_grammar.cc -o test_grammar
diff --git a/fast/README.md b/fast/README.md
index 541f93f..a11bd85 100644
--- a/fast/README.md
+++ b/fast/README.md
@@ -30,3 +30,5 @@ http://bytes.com/topic/c/answers/702569-blas-vs-cblas-c
http://www.netlib.org/lapack/#_standard_c_language_apis_for_lapack
http://www.osl.iu.edu/research/mtl/download.php3
http://scicomp.stackexchange.com/questions/351/recommendations-for-a-usable-fast-c-matrix-library
+
+http://goog-perftools.sourceforge.net/doc/tcmalloc.html
diff --git a/fast/dummyvector.h b/fast/dummyvector.h
deleted file mode 100644
index 18e2121..0000000
--- a/fast/dummyvector.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#pragma once
-
-#include
-
-
-struct DummyVector {
- double CountEF;
- double EgivenFCoherent;
- double Glue;
- double IsSingletonF;
- double IsSingletonFE;
- double LanguageModel;
- double LanguageModel_OOV;
- double MaxLexFgivenE;
- double MaxLexEgivenF;
- double PassThrough;
- double PassThrough_1;
- double PassThrough_2;
- double PassThrough_3;
- double PassThrough_4;
- double PassThrough_5;
- double PassThrough_6;
- double SampleCountF;
- double WordPenalty;
-
- MSGPACK_DEFINE(CountEF, EgivenFCoherent, Glue, IsSingletonF, IsSingletonFE, LanguageModel, LanguageModel_OOV, MaxLexEgivenF, MaxLexFgivenE, PassThrough, PassThrough_1, PassThrough_2, PassThrough_3, PassThrough_4, PassThrough_5, PassThrough_6, SampleCountF, WordPenalty);
-};
-
diff --git a/fast/grammar.cc b/fast/grammar.cc
index 7f2d506..558f6e6 100644
--- a/fast/grammar.cc
+++ b/fast/grammar.cc
@@ -1,170 +1,165 @@
#include "grammar.hh"
-string
-esc_str(const string& s) { // FIXME
- ostringstream os;
- for (auto it = s.cbegin(); it != s.cend(); it++) {
- switch (*it) {
- case '"': os << "\\\""; break;
- case '\\': os << "\\\\"; break;
- case '\b': os << "\\b"; break;
- case '\f': os << "\\f"; break;
- case '\n': os << "\\n"; break;
- case '\r': os << "\\r"; break;
- case '\t': os << "\\t"; break;
- default: os << *it; break;
- }
- }
-
- return os.str();
-}
-
namespace G {
+/*
+ * G::NT
+ *
+ */
NT::NT(string& s)
{
- s.erase(0, 1);
- s.pop_back();
+ s.erase(0, 1); s.pop_back(); // remove '[' and ']'
stringstream ss(s);
string buf;
- size_t c = 0;
- index = 0;
+ size_t j = 0;
+ index = 0; // default
while (ss.good() && getline(ss, buf, ',')) {
- if (c == 0) {
+ if (j == 0) {
symbol = buf;
} else {
index = stoi(buf);
}
- c++;
+ j++;
}
}
-T::T(string& s)
+string
+NT::repr() const
{
- word = s;
+ ostringstream os;
+ os << "NT<" << symbol << "," << index << ">";
+
+ return os.str();
}
-Item::Item(string& s)
+string
+NT::escaped() const
{
- if (s.front() == '[' && s.back() == ']') {
- type = NON_TERMINAL;
- nt = new NT(s);
- } else {
- type = TERMINAL;
- t = new T(s);
- }
+ ostringstream os;
+ os << "[" << symbol;
+ if (index > 0)
+ os << "," << index;
+ os << "]";
+
+ return os.str();
}
-Rule::Rule(string& s)
+ostream&
+operator<<(ostream& os, const NT& nt)
{
- stringstream ss(s);
- size_t c = 0;
- string buf;
- while (ss >> buf) {
- if (buf == "|||") { c++; continue; }
- if (c == 0) { // LHS
- lhs = new NT(buf);
- } else if (c == 1) { // RHS
- rhs.push_back(new Item(buf));
- if (rhs.back()->type == NON_TERMINAL) arity++;
- } else if (c == 2) { // TARGET
- target.push_back(new Item(buf));
- } else if (c == 3) { // F TODO
- } else if (c == 4) { // A TODO
- } else { // ERROR FIXME
- }
- if (c == 4) break;
- }
- arity = 0;
+ return os << nt.repr();
}
-Grammar::Grammar(string fn)
+/*
+ * G::T
+ *
+ */
+T::T(const string& s)
{
- ifstream ifs(fn);
- string line;
- while (getline(ifs, line)) {
- G::Rule* r = new G::Rule(line);
- rules.push_back(r);
- if (r->arity == 0)
- flat.push_back(r);
- else if (r->rhs.front()->type == NON_TERMINAL)
- start_nt.push_back(r);
- else
- start_t.push_back(r);
- }
+ word = s;
}
string
-Item::repr() const
+T::repr() const
{
ostringstream os;
- if (type == TERMINAL)
- os << t->repr();
- else
- os << nt->repr();
+ os << "T<" << word << ">";
return os.str();
}
string
-Item::escaped() const
+T::escaped() const
{
- ostringstream os;
- if (type == TERMINAL)
- os << t->escaped();
- else
- os << nt->escaped();
-
- return os.str();
+ return util::json_escape(word);
}
ostream&
-operator<<(ostream& os, const Item& i)
+operator<<(ostream& os, const T& t)
{
- return os << i.repr();
+ return os << t.repr();
}
-string
-NT::repr() const
-{
- ostringstream os;
- os << "NT<" << symbol << "," << index << ">";
- return os.str();
+/*
+ * G::Item
+ *
+ * Better solve this by inheritance
+ * -> rhs, target as vector ?
+ *
+ */
+Item::Item(string& s)
+{
+ if (s.front() == '[' && s.back() == ']') {
+ type = NON_TERMINAL;
+ nt = new NT(s);
+ } else {
+ type = TERMINAL;
+ t = new T(s);
+ }
}
string
-NT::escaped() const
+Item::repr() const
{
ostringstream os;
- os << "[" << symbol;
- if (index > 0)
- os << "," << index;
- os << "]";
+ if (type == TERMINAL)
+ os << t->repr();
+ else
+ os << nt->repr();
return os.str();
}
-ostream&
-operator<<(ostream& os, const NT& nt)
-{
- return os << nt.repr();
-}
-
string
-T::repr() const
+Item::escaped() const
{
ostringstream os;
- os << "T<" << word << ">";
+ if (type == TERMINAL)
+ os << t->escaped();
+ else
+ os << nt->escaped();
return os.str();
}
ostream&
-operator<<(ostream& os, const T& t)
+operator<<(ostream& os, const Item& i)
{
- return os << t.repr();
+ return os << i.repr();
+}
+
+/*
+ * G::Rule
+ *
+ */
+Rule::Rule(const string& s)
+{
+ stringstream ss(s);
+ size_t j = 0;
+ string buf;
+ arity = 0;
+ size_t index = 1;
+ while (ss >> buf) {
+ if (buf == "|||") { j++; continue; }
+ if (j == 0) { // LHS
+ lhs = new NT(buf);
+ } else if (j == 1) { // RHS
+ rhs.push_back(new Item(buf));
+ if (rhs.back()->type == NON_TERMINAL) arity++;
+ } else if (j == 2) { // TARGET
+ target.push_back(new Item(buf));
+ if (target.back()->type == NON_TERMINAL) {
+ order.insert(make_pair(index, target.back()->nt->index));
+ index++;
+ }
+ } else if (j == 3) { // F TODO
+ } else if (j == 4) { // A TODO
+ } else { // ERROR
+ }
+ if (j == 4) break;
+ }
}
string
@@ -183,7 +178,7 @@ Rule::repr() const
if (next(it) != target.end()) os << " ";
}
os << "}" \
- ", f:" << "TODO" << \
+ ", f:" << f->repr() << \
", arity=" << arity << \
", map:" << "TODO" << \
">";
@@ -191,12 +186,6 @@ Rule::repr() const
return os.str();
}
-ostream&
-operator<<(ostream& os, const Rule& r)
-{
- return os << r.repr();
-}
-
string
Rule::escaped() const
{
@@ -212,18 +201,44 @@ Rule::escaped() const
if (next(it) != target.end()) os << " ";
}
os << " ||| ";
- os << "TODO";
+ os << f->escaped();
os << " ||| ";
os << "TODO";
return os.str();
}
+ostream&
+operator<<(ostream& os, const Rule& r)
+{
+ return os << r.repr();
+}
+
+/*
+ * G::Grammmar
+ *
+ */
+Grammar::Grammar(const string& fn)
+{
+ ifstream ifs(fn);
+ string line;
+ while (getline(ifs, line)) {
+ G::Rule* r = new G::Rule(line);
+ rules.push_back(r);
+ if (r->arity == 0)
+ flat.push_back(r);
+ else if (r->rhs.front()->type == NON_TERMINAL)
+ start_nt.push_back(r);
+ else
+ start_t.push_back(r);
+ }
+}
+
ostream&
operator<<(ostream& os, const Grammar& g)
{
- for (auto it = g.rules.begin(); it != g.rules.end(); it++)
- os << (**it).repr() << endl;
+ for (const auto it: g.rules)
+ os << it->repr() << endl;
return os;
}
diff --git a/fast/grammar.hh b/fast/grammar.hh
index 51501cf..48a5116 100644
--- a/fast/grammar.hh
+++ b/fast/grammar.hh
@@ -1,38 +1,42 @@
#pragma once
+#include
#include
-#include
#include
-#include
-#include
+#include
#include