summaryrefslogtreecommitdiff
path: root/fast
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-08-24 17:26:05 +0100
committerPatrick Simianer <p@simianer.de>2014-08-24 17:26:05 +0100
commitf1916c39b820b7d10d1ae7d7447675c4224d8197 (patch)
tree7d3673d7784e7b3adbb6ca5b86e31058176bd376 /fast
parentcef65063cec641a93973b38a48e100fdd115db44 (diff)
fixes
Diffstat (limited to 'fast')
-rw-r--r--fast/grammar.hh51
-rw-r--r--fast/hypergraph.cc6
-rw-r--r--fast/parse.cc55
-rw-r--r--fast/parse.hh11
-rw-r--r--fast/test/Makefile5
-rwxr-xr-xfast/test/test_grammarbin56832 -> 60943 bytes
-rw-r--r--fast/util.hh19
-rw-r--r--fast/weaver.hh5
8 files changed, 64 insertions, 88 deletions
diff --git a/fast/grammar.hh b/fast/grammar.hh
index e5acb8a..4906c46 100644
--- a/fast/grammar.hh
+++ b/fast/grammar.hh
@@ -49,32 +49,29 @@ struct NT : public Item {
index_ = 0; // default
string t(s);
t.erase(0, 1); t.pop_back(); // remove '[' and ']'
- istringstream ss(s);
+ istringstream ss(t);
if (ss >> index_) { // [i]
symbol_ = "";
index_ = stoi(s);
-
return;
- } else { // [X]
- symbol_ = s;
-
- return;
- }
- string buf;
- size_t j = 0;
- while (ss.good() && getline(ss, buf, ',')) {
- if (j == 0) {
- symbol_ = buf;
- } else {
- index_ = stoi(buf);
+ } else {
+ ss.clear();
+ string buf;
+ size_t j = 0;
+ while (ss.good() && getline(ss, buf, ',')) {
+ if (j == 0) {
+ symbol_ = buf;
+ } else {
+ index_ = stoi(buf);
+ }
+ j++;
}
- j++;
}
}
virtual size_t index() const { return index_; }
virtual symbol_t symbol() const { return symbol_; }
- virtual item_type type() { return NON_TERMINAL; }
+ virtual item_type type() const { return NON_TERMINAL; }
virtual ostream&
repr(ostream& os) const
@@ -103,7 +100,7 @@ struct T : public Item {
}
virtual symbol_t symbol() const { return symbol_; }
- virtual item_type type() { return TERMINAL; }
+ virtual item_type type() const { return TERMINAL; }
virtual ostream&
repr(ostream& os) const
@@ -169,7 +166,7 @@ Sv::SparseVector<string, score_t>* f;
{
istringstream ss(s);
string buf;
- size_t j = 0, i = 0;
+ size_t j = 0, i = 1;
r->arity = 0;
vector<NT*> rhs_non_terminals;
r->f = new Sv::SparseVector<string, score_t>();
@@ -187,7 +184,7 @@ Sv::SparseVector<string, score_t>* f;
} else if (j == 2) { // target
Item* item = vocab.get(buf);
if (item->type() == NON_TERMINAL) {
- r->order[i] = item->index();
+ r->order.insert(make_pair(i, item->index()));
i++;
if (item->symbol() == "") { // only [1], [2] ... on target
reinterpret_cast<NT*>(item)->symbol_ = \
@@ -223,9 +220,13 @@ Sv::SparseVector<string, score_t>* f;
}
os << "}, f:";
f->repr(os);
- os << ", arity=" << arity << \
- ", map:" << "TODO" << \
- ">";
+ os << ", arity=" << arity << \
+ ", order:{";
+ for (auto it = order.begin(); it != order.end(); it++) {
+ os << it->first << "->" << it->second;
+ if (next(it) != order.end()) os << ", ";
+ }
+ os << "}>";
return os;
}
@@ -246,14 +247,14 @@ Sv::SparseVector<string, score_t>* f;
}
os << " ||| ";
f->escaped(os);
- os << " ||| ";
- os << "TODO";
+ os << " ||| " << \
+ "TODO";
return os;
};
friend ostream&
- operator<<(ostream& os, const Rule& r)
+ operator<<(ostream& os, Rule const& r)
{
return r.repr(os);
};
diff --git a/fast/hypergraph.cc b/fast/hypergraph.cc
index d9a51a5..2b33ff4 100644
--- a/fast/hypergraph.cc
+++ b/fast/hypergraph.cc
@@ -69,12 +69,12 @@ viterbi(Hypergraph& hg)
void
viterbi_path(Hypergraph& hg, Path& p)
{
- //list<Node*>::iterator root = \
+ list<Node*>::iterator root = \
find_if(hg.nodes.begin(), hg.nodes.end(), \
[](Node* n) { return n->incoming.size() == 0; });
- list<Node*>::iterator root = hg.nodes.begin();
+ //list<Node*>::iterator root = hg.nodes.begin();
- //Hg::topological_sort(hg.nodes, root);
+ Hg::topological_sort(hg.nodes, root);
// ^^^ FIXME do I need to do this when reading from file?
Semiring::Viterbi<score_t> semiring;
Hg::init(hg.nodes, root, semiring);
diff --git a/fast/parse.cc b/fast/parse.cc
deleted file mode 100644
index 06c9fa0..0000000
--- a/fast/parse.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-#include "parse.hh"
-
-
-namespace Parse {
-
-
-} //
-
-
-vector<G::T> tokenize(string s)
-{
- istringstream ss(s);
- vector<G::T> res;
- while (ss.good()) {
- string t;
- ss >> t;
- G::T i(t);
- cout << i.word << endl;
- res.push_back(i);
- }
- return res;
-}
-
-
-bool operator==(vector<G::Item> const& a, vector<G::Item> const& b)
-{
- if (a.size() != b.size()) return false;
- for (auto it: a)
-}
-
-int main(int argc, char** argv)
-{
- string in("karten haie");
- vector<G::T> tok = tokenize(in);
- for (auto it: tok)
- cout << it.word << ",";
- cout << endl;
- size_t n = tok.size();
-
- G::Grammar g(argv[1]);
-
- vector<Span> spans;
- Parse::visit(spans, 1, 0, 6);
- for (auto it: spans) {
- cout << "(" << it.first << "," << it.second << ")" << endl;
- }
-
- Parse::Chart active(n);
- Parse::Chart passive(n);
-
- //init(tok, n, active, passive, g);
-
- cout << *(g.flat.at(0)) << endl;
-}
-
diff --git a/fast/parse.hh b/fast/parse.hh
index 9fbcdea..33ea9ce 100644
--- a/fast/parse.hh
+++ b/fast/parse.hh
@@ -6,6 +6,8 @@
#include <unordered_map>
#include "grammar.hh"
+#include "util.hh"
+#include "weaver.hh"
using namespace std;
@@ -74,9 +76,10 @@ struct Chart
string h(ChartItem* item, Span s)
{
ostringstream ss;
- ss << item->rule->lhs->symbol;
+ item->rule->lhs->symbol();
ss << s.first;
ss << s.second;
+
return ss.str();
}
@@ -92,9 +95,11 @@ struct Chart
};
-void init(vector<G::T> const& in, size_t n, Chart& active, Chart& passive, G::Grammar const& g)
+void
+init(vector<symbol_t> const& in, size_t n, Chart& active, Chart& passive, G::Grammar const& g)
{
- for (auto rule: g.flat) {
+ for (auto rule: g.rules) {
+ cout << *rule << endl;
}
}
diff --git a/fast/test/Makefile b/fast/test/Makefile
index 0140f63..65e97ef 100644
--- a/fast/test/Makefile
+++ b/fast/test/Makefile
@@ -3,7 +3,7 @@ CFLAGS=-std=c++11 -O3 -I../
TCMALLOC=/home/pks/src/weaver/fast/gperftools-2.1/lib/libtcmalloc_minimal.a -pthread
-all: test_grammar test_sparse_vector
+all: test_grammar test_sparse_vector test_parse
test_grammar: test_grammar.cc ../grammar.hh
$(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) test_grammar.cc -o test_grammar
@@ -11,6 +11,9 @@ test_grammar: test_grammar.cc ../grammar.hh
test_sparse_vector: test_sparse_vector.cc ../sparse_vector.hh
$(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) test_sparse_vector.cc -o test_sparse_vector
+test_parse: test_parse.cc ../parse.hh ../grammar.hh ../util.hh
+ $(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) test_parse.cc -o test_parse
+
clean:
rm -f test_grammar test_sparse_vector
diff --git a/fast/test/test_grammar b/fast/test/test_grammar
index 088d55a..6cf7ad5 100755
--- a/fast/test/test_grammar
+++ b/fast/test/test_grammar
Binary files differ
diff --git a/fast/util.hh b/fast/util.hh
index c3e087e..9ce19da 100644
--- a/fast/util.hh
+++ b/fast/util.hh
@@ -2,13 +2,16 @@
#include <string>
+#include "weaver.hh"
+
using namespace std;
namespace util {
inline string
-json_escape(const string& s) { // FIXME: only inline?
+json_escape(const string& s)
+{
ostringstream os;
for (auto it = s.cbegin(); it != s.cend(); it++) {
switch (*it) {
@@ -26,5 +29,19 @@ json_escape(const string& s) { // FIXME: only inline?
return os.str();
}
+inline vector<symbol_t>
+tokenize(string s)
+{
+ istringstream ss(s);
+ vector<symbol_t> r;
+ while (ss.good()) {
+ string buf;
+ ss >> buf;
+ r.push_back(buf);
+ }
+
+ return r;
+}
+
} // namespace util
diff --git a/fast/weaver.hh b/fast/weaver.hh
index 39d5391..e89b4dd 100644
--- a/fast/weaver.hh
+++ b/fast/weaver.hh
@@ -1,5 +1,10 @@
#pragma once
+#include <string>
+
+using namespace std;
+
+
typedef double score_t;
typedef string symbol_t;