From f1916c39b820b7d10d1ae7d7447675c4224d8197 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 24 Aug 2014 17:26:05 +0100 Subject: fixes --- fast/grammar.hh | 51 +++++++++++++++++++++++---------------------- fast/hypergraph.cc | 6 +++--- fast/parse.cc | 55 ------------------------------------------------- fast/parse.hh | 11 +++++++--- fast/test/Makefile | 5 ++++- fast/test/test_grammar | Bin 56832 -> 60943 bytes fast/util.hh | 19 ++++++++++++++++- fast/weaver.hh | 5 +++++ 8 files changed, 64 insertions(+), 88 deletions(-) delete mode 100644 fast/parse.cc diff --git a/fast/grammar.hh b/fast/grammar.hh index e5acb8a..4906c46 100644 --- a/fast/grammar.hh +++ b/fast/grammar.hh @@ -49,32 +49,29 @@ struct NT : public Item { index_ = 0; // default string t(s); t.erase(0, 1); t.pop_back(); // remove '[' and ']' - istringstream ss(s); + istringstream ss(t); if (ss >> index_) { // [i] symbol_ = ""; index_ = stoi(s); - return; - } else { // [X] - symbol_ = s; - - return; - } - string buf; - size_t j = 0; - while (ss.good() && getline(ss, buf, ',')) { - if (j == 0) { - symbol_ = buf; - } else { - index_ = stoi(buf); + } else { + ss.clear(); + string buf; + size_t j = 0; + while (ss.good() && getline(ss, buf, ',')) { + if (j == 0) { + symbol_ = buf; + } else { + index_ = stoi(buf); + } + j++; } - j++; } } virtual size_t index() const { return index_; } virtual symbol_t symbol() const { return symbol_; } - virtual item_type type() { return NON_TERMINAL; } + virtual item_type type() const { return NON_TERMINAL; } virtual ostream& repr(ostream& os) const @@ -103,7 +100,7 @@ struct T : public Item { } virtual symbol_t symbol() const { return symbol_; } - virtual item_type type() { return TERMINAL; } + virtual item_type type() const { return TERMINAL; } virtual ostream& repr(ostream& os) const @@ -169,7 +166,7 @@ Sv::SparseVector* f; { istringstream ss(s); string buf; - size_t j = 0, i = 0; + size_t j = 0, i = 1; r->arity = 0; vector rhs_non_terminals; r->f = new Sv::SparseVector(); @@ -187,7 +184,7 @@ Sv::SparseVector* f; } else if (j == 2) { // target Item* item = vocab.get(buf); if (item->type() == NON_TERMINAL) { - r->order[i] = item->index(); + r->order.insert(make_pair(i, item->index())); i++; if (item->symbol() == "") { // only [1], [2] ... on target reinterpret_cast(item)->symbol_ = \ @@ -223,9 +220,13 @@ Sv::SparseVector* f; } os << "}, f:"; f->repr(os); - os << ", arity=" << arity << \ - ", map:" << "TODO" << \ - ">"; + os << ", arity=" << arity << \ + ", order:{"; + for (auto it = order.begin(); it != order.end(); it++) { + os << it->first << "->" << it->second; + if (next(it) != order.end()) os << ", "; + } + os << "}>"; return os; } @@ -246,14 +247,14 @@ Sv::SparseVector* f; } os << " ||| "; f->escaped(os); - os << " ||| "; - os << "TODO"; + os << " ||| " << \ + "TODO"; return os; }; friend ostream& - operator<<(ostream& os, const Rule& r) + operator<<(ostream& os, Rule const& r) { return r.repr(os); }; diff --git a/fast/hypergraph.cc b/fast/hypergraph.cc index d9a51a5..2b33ff4 100644 --- a/fast/hypergraph.cc +++ b/fast/hypergraph.cc @@ -69,12 +69,12 @@ viterbi(Hypergraph& hg) void viterbi_path(Hypergraph& hg, Path& p) { - //list::iterator root = \ + list::iterator root = \ find_if(hg.nodes.begin(), hg.nodes.end(), \ [](Node* n) { return n->incoming.size() == 0; }); - list::iterator root = hg.nodes.begin(); + //list::iterator root = hg.nodes.begin(); - //Hg::topological_sort(hg.nodes, root); + Hg::topological_sort(hg.nodes, root); // ^^^ FIXME do I need to do this when reading from file? Semiring::Viterbi semiring; Hg::init(hg.nodes, root, semiring); diff --git a/fast/parse.cc b/fast/parse.cc deleted file mode 100644 index 06c9fa0..0000000 --- a/fast/parse.cc +++ /dev/null @@ -1,55 +0,0 @@ -#include "parse.hh" - - -namespace Parse { - - -} // - - -vector tokenize(string s) -{ - istringstream ss(s); - vector res; - while (ss.good()) { - string t; - ss >> t; - G::T i(t); - cout << i.word << endl; - res.push_back(i); - } - return res; -} - - -bool operator==(vector const& a, vector const& b) -{ - if (a.size() != b.size()) return false; - for (auto it: a) -} - -int main(int argc, char** argv) -{ - string in("karten haie"); - vector tok = tokenize(in); - for (auto it: tok) - cout << it.word << ","; - cout << endl; - size_t n = tok.size(); - - G::Grammar g(argv[1]); - - vector spans; - Parse::visit(spans, 1, 0, 6); - for (auto it: spans) { - cout << "(" << it.first << "," << it.second << ")" << endl; - } - - Parse::Chart active(n); - Parse::Chart passive(n); - - //init(tok, n, active, passive, g); - - cout << *(g.flat.at(0)) << endl; -} - diff --git a/fast/parse.hh b/fast/parse.hh index 9fbcdea..33ea9ce 100644 --- a/fast/parse.hh +++ b/fast/parse.hh @@ -6,6 +6,8 @@ #include #include "grammar.hh" +#include "util.hh" +#include "weaver.hh" using namespace std; @@ -74,9 +76,10 @@ struct Chart string h(ChartItem* item, Span s) { ostringstream ss; - ss << item->rule->lhs->symbol; + item->rule->lhs->symbol(); ss << s.first; ss << s.second; + return ss.str(); } @@ -92,9 +95,11 @@ struct Chart }; -void init(vector const& in, size_t n, Chart& active, Chart& passive, G::Grammar const& g) +void +init(vector const& in, size_t n, Chart& active, Chart& passive, G::Grammar const& g) { - for (auto rule: g.flat) { + for (auto rule: g.rules) { + cout << *rule << endl; } } diff --git a/fast/test/Makefile b/fast/test/Makefile index 0140f63..65e97ef 100644 --- a/fast/test/Makefile +++ b/fast/test/Makefile @@ -3,7 +3,7 @@ CFLAGS=-std=c++11 -O3 -I../ TCMALLOC=/home/pks/src/weaver/fast/gperftools-2.1/lib/libtcmalloc_minimal.a -pthread -all: test_grammar test_sparse_vector +all: test_grammar test_sparse_vector test_parse test_grammar: test_grammar.cc ../grammar.hh $(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) test_grammar.cc -o test_grammar @@ -11,6 +11,9 @@ test_grammar: test_grammar.cc ../grammar.hh test_sparse_vector: test_sparse_vector.cc ../sparse_vector.hh $(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) test_sparse_vector.cc -o test_sparse_vector +test_parse: test_parse.cc ../parse.hh ../grammar.hh ../util.hh + $(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) test_parse.cc -o test_parse + clean: rm -f test_grammar test_sparse_vector diff --git a/fast/test/test_grammar b/fast/test/test_grammar index 088d55a..6cf7ad5 100755 Binary files a/fast/test/test_grammar and b/fast/test/test_grammar differ diff --git a/fast/util.hh b/fast/util.hh index c3e087e..9ce19da 100644 --- a/fast/util.hh +++ b/fast/util.hh @@ -2,13 +2,16 @@ #include +#include "weaver.hh" + using namespace std; namespace util { inline string -json_escape(const string& s) { // FIXME: only inline? +json_escape(const string& s) +{ ostringstream os; for (auto it = s.cbegin(); it != s.cend(); it++) { switch (*it) { @@ -26,5 +29,19 @@ json_escape(const string& s) { // FIXME: only inline? return os.str(); } +inline vector +tokenize(string s) +{ + istringstream ss(s); + vector r; + while (ss.good()) { + string buf; + ss >> buf; + r.push_back(buf); + } + + return r; +} + } // namespace util diff --git a/fast/weaver.hh b/fast/weaver.hh index 39d5391..e89b4dd 100644 --- a/fast/weaver.hh +++ b/fast/weaver.hh @@ -1,5 +1,10 @@ #pragma once +#include + +using namespace std; + + typedef double score_t; typedef string symbol_t; -- cgit v1.2.3