diff options
author | Patrick Simianer <p@simianer.de> | 2014-08-23 22:59:16 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-08-23 22:59:16 +0100 |
commit | cef65063cec641a93973b38a48e100fdd115db44 (patch) | |
tree | 32d5f10757e021a9fad01156fbff62a96212f006 | |
parent | 190f68c880eb27506669e95e2bc0493e2ec42c4c (diff) |
rewritten grammar
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | fast/Makefile | 23 | ||||
-rw-r--r-- | fast/README.md | 3 | ||||
-rw-r--r-- | fast/grammar.cc | 273 | ||||
-rw-r--r-- | fast/grammar.hh | 303 | ||||
-rw-r--r-- | fast/hypergraph.cc | 23 | ||||
-rw-r--r-- | fast/hypergraph.hh | 4 | ||||
-rw-r--r-- | fast/main.cc | 3 | ||||
-rw-r--r-- | fast/parse.cc | 55 | ||||
-rw-r--r-- | fast/parse.hh | 103 | ||||
-rw-r--r-- | fast/semiring.hh | 1 | ||||
-rw-r--r-- | fast/sparse_vector.hh | 21 | ||||
-rw-r--r-- | fast/test/Makefile | 16 | ||||
-rwxr-xr-x | fast/test/test_grammar | bin | 0 -> 56832 bytes | |||
-rw-r--r-- | fast/test/test_grammar.cc | 20 | ||||
-rwxr-xr-x | fast/test/test_sparse_vector | bin | 0 -> 44288 bytes | |||
-rw-r--r-- | fast/test/test_sparse_vector.cc (renamed from fast/test_sparse_vector.cc) | 0 | ||||
-rw-r--r-- | fast/test_grammar.cc | 17 | ||||
-rw-r--r-- | fast/util.hh | 35 | ||||
-rw-r--r-- | fast/weaver.hh | 1 |
20 files changed, 511 insertions, 391 deletions
@@ -5,3 +5,4 @@ fast/test_grammar fast/test_sparse_vector util/make_pak util/read_pak +fast/gperftools-2.1/ diff --git a/fast/Makefile b/fast/Makefile index 9e88076..1a7f5b9 100644 --- a/fast/Makefile +++ b/fast/Makefile @@ -1,24 +1,15 @@ COMPILER=g++ CFLAGS=-std=c++11 -O3 +TCMALLOC=/home/pks/src/weaver/fast/gperftools-2.1/lib/libtcmalloc_minimal.a -pthread -all: grammar.o hypergraph.o main.cc - $(COMPILER) $(CFLAGS) -std=c++11 -lstdc++ -lm -lmsgpack grammar.o hypergraph.o main.cc -o fast_weaver +all: hypergraph.o main.cc + $(COMPILER) $(CFLAGS) -lstdc++ -lm -lmsgpack $(TCMALLOC) hypergraph.o main.cc -o fast_weaver -hypergraph.o: hypergraph.cc hypergraph.hh grammar.o semiring.hh sparse_vector.hh weaver.hh - $(COMPILER) $(CFLAGS) -g -c hypergraph.cc - -grammar.o: grammar.cc grammar.hh sparse_vector.hh util.hh - $(COMPILER) $(CFLAGS) -g -c grammar.cc - -test: test_grammar test_sparse_vector - -test_grammar: test_grammar.cc grammar.o - $(COMPILER) $(CFLAGS) -lstdc++ -lm grammar.o test_grammar.cc -o test_grammar - -test_sparse_vector: test_sparse_vector.cc sparse_vector.hh - $(COMPILER) $(CFLAGS) -lstdc++ -lm test_sparse_vector.cc -o test_sparse_vector +hypergraph.o: hypergraph.cc hypergraph.hh semiring.hh sparse_vector.hh weaver.hh + $(COMPILER) $(CFLAGS) -g -c $(TCMALLOC) hypergraph.cc clean: - rm -f fast_weaver hypergraph.o grammar.o test_grammar test_sparse_vector + rm -f fast_weaver + rm -f hypergraph.o parse.o diff --git a/fast/README.md b/fast/README.md index 1d6bd04..f92245b 100644 --- a/fast/README.md +++ b/fast/README.md @@ -2,7 +2,6 @@ TODO * sparse vector (unordered_map) -> where to store? * parser * Rule -> ChartItem -> Node ? - * viterbi path/string * k-best * other semirings * include language model @@ -34,4 +33,6 @@ http://www.osl.iu.edu/research/mtl/download.php3 http://scicomp.stackexchange.com/questions/351/recommendations-for-a-usable-fast-c-matrix-library https://software.intel.com/en-us/tbb_4.2_doc http://goog-perftools.sourceforge.net/doc/tcmalloc.html +http://www.sgi.com/tech/stl/Rope.html +http://www.cs.unc.edu/Research/compgeom/gzstream/ diff --git a/fast/grammar.cc b/fast/grammar.cc deleted file mode 100644 index a003eb4..0000000 --- a/fast/grammar.cc +++ /dev/null @@ -1,273 +0,0 @@ -#include "grammar.hh" - - -namespace G { - -/* - * G::NT - * - */ -NT::NT(string& s) -{ - s.erase(0, 1); s.pop_back(); // remove '[' and ']' - istringstream ss(s); - if (ss >> index) { // [i] - symbol = ""; - index = stoi(s); - - return; - } else { // [X] - symbol = s; - index = 0; - - return; - } - string buf; - size_t j = 0; - index = 0; // default - while (ss.good() && getline(ss, buf, ',')) { - if (j == 0) { - symbol = buf; - } else { - index = stoi(buf); - } - j++; - } -} - -string -NT::repr() const -{ - ostringstream os; - os << "NT<" << symbol << "," << index << ">"; - - return os.str(); -} - -string -NT::escaped() const -{ - ostringstream os; - os << "[" << symbol; - if (index > 0) - os << "," << index; - os << "]"; - - return os.str(); -} - -ostream& -operator<<(ostream& os, const NT& nt) -{ - return os << nt.repr(); -} - -/* - * G::T - * - */ -T::T(const string& s) -{ - word = s; -} - -string -T::repr() const -{ - ostringstream os; - os << "T<" << word << ">"; - - return os.str(); -} - -string -T::escaped() const -{ - return util::json_escape(word); -} - -ostream& -operator<<(ostream& os, const T& t) -{ - return os << t.repr(); -} - - -/* - * G::Item - * - * Better solve this by inheritance - * -> rhs, target as vector<base class> ? - * - */ -Item::Item(string& s) -{ - if (s.front() == '[' && s.back() == ']') { - type = NON_TERMINAL; - nt = new NT(s); - } else { - type = TERMINAL; - t = new T(s); - } -} - -string -Item::repr() const -{ - ostringstream os; - if (type == TERMINAL) - os << t->repr(); - else - os << nt->repr(); - - return os.str(); -} - -string -Item::escaped() const -{ - ostringstream os; - if (type == TERMINAL) - os << t->escaped(); - else - os << nt->escaped(); - - return os.str(); -} - -ostream& -operator<<(ostream& os, const Item& i) -{ - return os << i.repr(); -} - -/* - * G::Rule - * - */ -Rule::Rule(const string& s) -{ - from_s(this, s); -} - -void -Rule::from_s(Rule* r, const string& s) -{ - stringstream ss(s); - size_t j = 0; - string buf; - r->arity = 0; - size_t index = 1; - vector<G::NT*> rhs_nt; - r->f = new Sv::SparseVector<string, score_t>(); - while (ss >> buf) { - if (buf == "|||") { j++; continue; } - if (j == 0) { // LHS - r->lhs = new NT(buf); - } else if (j == 1) { // RHS - r->rhs.push_back(new Item(buf)); - if (r->rhs.back()->type == NON_TERMINAL) { - rhs_nt.push_back(r->rhs.back()->nt); - r->arity++; - } - } else if (j == 2) { // TARGET - r->target.push_back(new Item(buf)); - if (r->target.back()->type == NON_TERMINAL) { - r->order.insert(make_pair(index, r->target.back()->nt->index)); - if (r->target.back()->nt->symbol == "") - r->target.back()->nt->symbol = rhs_nt[r->target.back()->nt->index-1]->symbol; - index++; - } - } else if (j == 3) { // F TODO - Sv::SparseVector<string, score_t>::from_s(r->f, buf); // FIXME this is slow!!! - } else if (j == 4) { // A TODO - } else { - // ERROR - } - if (j == 4) break; - } -} - -string -Rule::repr() const -{ - ostringstream os; - os << "Rule<lhs=" << lhs->repr() << \ - ", rhs:{"; - for (auto it = rhs.begin(); it != rhs.end(); it++) { - os << (**it).repr(); - if (next(it) != rhs.end()) os << " "; - } - os << "}, target:{"; - for (auto it = target.begin(); it != target.end(); it++) { - os << (**it).repr(); - if (next(it) != target.end()) os << " "; - } - os << "}" \ - ", f:" << f->repr() << \ - ", arity=" << arity << \ - ", map:" << "TODO" << \ - ">"; - - return os.str(); -} - -string -Rule::escaped() const -{ - ostringstream os; - os << lhs->escaped() << " ||| "; - for (auto it = rhs.begin(); it != rhs.end(); it++) { - os << (**it).escaped(); - if (next(it) != rhs.end()) os << " "; - } - os << " ||| "; - for (auto it = target.begin(); it != target.end(); it++) { - os << (**it).escaped(); - if (next(it) != target.end()) os << " "; - } - os << " ||| "; - os << f->escaped(); - os << " ||| "; - os << "TODO(alignment)"; - - return os.str(); -} - -ostream& -operator<<(ostream& os, const Rule& r) -{ - return os << r.repr(); -} - -/* - * G::Grammmar - * - */ -Grammar::Grammar(const string& fn) -{ - ifstream ifs(fn); - string line; - while (getline(ifs, line)) { - G::Rule* r = new G::Rule(line); - rules.push_back(r); - if (r->arity == 0) - flat.push_back(r); - else if (r->rhs.front()->type == NON_TERMINAL) - start_nt.push_back(r); - else - start_t.push_back(r); - } -} - -ostream& -operator<<(ostream& os, const Grammar& g) -{ - for (const auto it: g.rules) - os << it->repr() << endl; - - return os; -} - -} // namespace G - diff --git a/fast/grammar.hh b/fast/grammar.hh index 1b9ac5a..e5acb8a 100644 --- a/fast/grammar.hh +++ b/fast/grammar.hh @@ -1,13 +1,14 @@ #pragma once -#include <fstream> #include <iostream> +#include <fstream> +#include <map> #include <sstream> #include <string> -#include <map> -#include <msgpack.hpp> #include <vector> +#include <msgpack.hpp> + #include "sparse_vector.hh" #include "util.hh" @@ -16,46 +17,138 @@ using namespace std; namespace G { -struct NT { - string symbol; - size_t index; +enum item_type { + UNDEFINED, + TERMINAL, + NON_TERMINAL +}; - NT() {}; - NT(string& s); +struct Item { + virtual size_t index() const { return 0; } + virtual symbol_t symbol() const { return ""; } + virtual item_type type() const { return UNDEFINED; } - string repr() const; - string escaped() const; + virtual ostream& repr(ostream& os) const { return os << "Item<>"; } + virtual ostream& escaped(ostream& os) const { return os << ""; } - friend ostream& operator<<(ostream& os, const NT& t); + friend ostream& + operator<<(ostream& os, const Item& i) + { + return i.repr(os); + }; }; -struct T { - string word; // use word ids instead? +struct NT : public Item { + symbol_t symbol_; + size_t index_; + + NT() {} + + NT(string const& s) + { + index_ = 0; // default + string t(s); + t.erase(0, 1); t.pop_back(); // remove '[' and ']' + istringstream ss(s); + if (ss >> index_) { // [i] + symbol_ = ""; + index_ = stoi(s); + + return; + } else { // [X] + symbol_ = s; + + return; + } + string buf; + size_t j = 0; + while (ss.good() && getline(ss, buf, ',')) { + if (j == 0) { + symbol_ = buf; + } else { + index_ = stoi(buf); + } + j++; + } + } - T(const string& s); + virtual size_t index() const { return index_; } + virtual symbol_t symbol() const { return symbol_; } + virtual item_type type() { return NON_TERMINAL; } - string repr() const; - string escaped() const; + virtual ostream& + repr(ostream& os) const + { + return os << "NT<" << symbol_ << "," << index_ << ">"; + } - friend ostream& operator<<(ostream& os, const NT& nt); + virtual ostream& + escaped(ostream& os) const + { + os << "[" << symbol_; + if (index_ > 0) + os << "," << index_; + os << "]"; + + return os; + } }; -enum item_type { - NON_TERMINAL, - TERMINAL +struct T : public Item { + symbol_t symbol_; + + T(string const& s) + { + symbol_ = s; + } + + virtual symbol_t symbol() const { return symbol_; } + virtual item_type type() { return TERMINAL; } + + virtual ostream& + repr(ostream& os) const + { + return os << "T<" << symbol_ << ">"; + } + + virtual ostream& + escaped(ostream& os) const + { + os << util::json_escape(symbol_); + } }; -struct Item { - item_type type; - NT* nt; - T* t; +struct Vocabulary +{ + unordered_map<symbol_t, size_t> m_; + vector<Item*> items_; - Item(string& s); + bool + is_non_terminal(string const& s) + { + return s.front() == '[' && s.back() == ']'; + } - string repr() const; - string escaped() const; + Item* + get(symbol_t const& s) + { + if (is_non_terminal(s)) + return new NT(s); + if (m_.find(s) != m_.end()) + return items_[m_[s]]; + return add(s); + } - friend ostream& operator<<(ostream& os, const Item& i); + Item* + add(symbol_t const& s) + { + size_t next_index_ = items_.size(); + T* item = new T(s); + items_.push_back(item); + m_[s] = next_index_; + + return item; + } }; struct Rule { @@ -65,35 +158,157 @@ struct Rule { size_t arity; Sv::SparseVector<string, score_t>* f; map<size_t, size_t> order; - string as_str_; // FIXME + string as_str_; + + Rule() {} + + Rule(string const& s, Vocabulary& vocab) { from_s(this, s, vocab); } + + static void + from_s(Rule* r, string const& s, Vocabulary& vocab) + { + istringstream ss(s); + string buf; + size_t j = 0, i = 0; + r->arity = 0; + vector<NT*> rhs_non_terminals; + r->f = new Sv::SparseVector<string, score_t>(); + while (ss >> buf) { + if (buf == "|||") { j++; continue; } + if (j == 0) { // left-hand side + r->lhs = new NT(buf); + } else if (j == 1) { // right-hand side + Item* item = vocab.get(buf); + r->rhs.push_back(item); + if (item->type() == NON_TERMINAL) { + r->arity++; + rhs_non_terminals.push_back(reinterpret_cast<NT*>(item)); + } + } else if (j == 2) { // target + Item* item = vocab.get(buf); + if (item->type() == NON_TERMINAL) { + r->order[i] = item->index(); + i++; + if (item->symbol() == "") { // only [1], [2] ... on target + reinterpret_cast<NT*>(item)->symbol_ = \ + rhs_non_terminals[item->index()-1]->symbol(); + } + } + r->target.push_back(item); + } else if (j == 3) { // feature vector + Sv::SparseVector<string, score_t>::from_s(r->f, buf); + // FIXME: this is slow!!! ^^^ + } else if (j == 4) { // alignment + } else { + // error + } + if (j == 4) break; + } + } - Rule() {}; - Rule(const string& s); - static void from_s(Rule* r, const string& s); + ostream& + repr(ostream& os) const + { + os << "Rule<lhs="; + lhs->repr(os); + os << ", rhs:{"; + for (auto it = rhs.begin(); it != rhs.end(); it++) { + (**it).repr(os); + if (next(it) != rhs.end()) os << " "; + } + os << "}, target:{"; + for (auto it = target.begin(); it != target.end(); it++) { + (**it).repr(os); + if (next(it) != target.end()) os << " "; + } + os << "}, f:"; + f->repr(os); + os << ", arity=" << arity << \ + ", map:" << "TODO" << \ + ">"; - string repr() const; - string escaped() const; + return os; + } - friend ostream& operator<<(ostream& os, const Rule& r); + ostream& + escaped(ostream& os) const + { + lhs->escaped(os); + os << " ||| "; + for (auto it = rhs.begin(); it != rhs.end(); it++) { + (**it).escaped(os); + if (next(it) != rhs.end()) os << " "; + } + os << " ||| "; + for (auto it = target.begin(); it != target.end(); it++) { + (**it).escaped(os); + if (next(it) != target.end()) os << " "; + } + os << " ||| "; + f->escaped(os); + os << " ||| "; + os << "TODO"; - void prep_for_serialization_() { as_str_ = escaped(); }; // FIXME + return os; + }; - MSGPACK_DEFINE(as_str_); // TODO + friend ostream& + operator<<(ostream& os, const Rule& r) + { + return r.repr(os); + }; + + // -- + void + prep_for_serialization_() + { + ostringstream os; + escaped(os); + as_str_ = os.str(); + }; + MSGPACK_DEFINE(as_str_); + // ^^^ FIXME }; struct Grammar { vector<Rule*> rules; vector<Rule*> flat; - vector<Rule*> start_nt; - vector<Rule*> start_t; + vector<Rule*> start_non_terminal; + vector<Rule*> start_terminal; + + Grammar() {} + + Grammar(string const& fn, Vocabulary& vocab) + { + ifstream ifs(fn); + string line; + while (getline(ifs, line)) { + G::Rule* r = new G::Rule(line, vocab); + rules.push_back(r); + if (r->arity == 0) + flat.push_back(r); + else if (r->rhs.front()->type() == NON_TERMINAL) + start_non_terminal.push_back(r); + else + start_terminal.push_back(r); + } + } - Grammar() {}; - Grammar(const string& fn); + void add_glue(); + // ^^^ TODO + void add_pass_through(const string& input); + // ^^^ TODO - void add_glue(); // TODO - void add_pass_through(const string& input); // TODO + friend ostream& + operator<<(ostream& os, Grammar const& g) + { + for (const auto it: g.rules) { + it->repr(os); + os << endl; + } - friend ostream& operator<<(ostream& os, const Grammar& g); + return os; + } }; } // namespace G diff --git a/fast/hypergraph.cc b/fast/hypergraph.cc index a9a44f9..d9a51a5 100644 --- a/fast/hypergraph.cc +++ b/fast/hypergraph.cc @@ -69,11 +69,13 @@ viterbi(Hypergraph& hg) void viterbi_path(Hypergraph& hg, Path& p) { - list<Node*>::iterator root = \ + //list<Node*>::iterator root = \ find_if(hg.nodes.begin(), hg.nodes.end(), \ [](Node* n) { return n->incoming.size() == 0; }); + list<Node*>::iterator root = hg.nodes.begin(); - Hg::topological_sort(hg.nodes, root); // FIXME do I need to do this when reading from file? + //Hg::topological_sort(hg.nodes, root); + // ^^^ FIXME do I need to do this when reading from file? Semiring::Viterbi<score_t> semiring; Hg::init(hg.nodes, root, semiring); @@ -111,11 +113,11 @@ derive(const Path& p, const Node* cur, vector<string>& carry) unsigned j = 0; for (auto it: next->rule->target) { - if (it->type == G::NON_TERMINAL) { + if (it->type() == G::NON_TERMINAL) { derive(p, next->tails[next->rule->order[j]], carry); j++; } else { - carry.push_back(it->t->word); + carry.push_back(it->symbol()); } } } @@ -123,7 +125,7 @@ derive(const Path& p, const Node* cur, vector<string>& carry) namespace io { void -read(Hypergraph& hg, vector<G::Rule*>& rules, const string& fn) // FIXME +read(Hypergraph& hg, vector<G::Rule*>& rules, G::Vocabulary& vocab, const string& fn) { ifstream ifs(fn); size_t i = 0, r, n, e; @@ -145,7 +147,7 @@ read(Hypergraph& hg, vector<G::Rule*>& rules, const string& fn) // FIXME string s; o.convert(&s); G::Rule* rule = new G::Rule; - G::Rule::from_s(rule, s); + G::Rule::from_s(rule, s, vocab); rules.push_back(rule); } else if (i > r+2 && i <= r+n+2) { Node* n = new Node; @@ -193,7 +195,7 @@ write(Hypergraph& hg, vector<G::Rule*>& rules, const string& fn) // FIXME } void -manual(Hypergraph& hg, vector<G::Rule*>& rules) +manual(Hypergraph& hg, vector<G::Rule*>& rules, G::Vocabulary& vocab) { // nodes Node* a = new Node; a->id = 0; a->symbol = "root"; a->left = -1; a->right = -1; a->mark = 0; @@ -228,7 +230,7 @@ manual(Hypergraph& hg, vector<G::Rule*>& rules) rule_strs.push_back("[S] ||| [NP,1] [VP,2] ||| [NP,1] [VP,2] ||| ||| "); for (auto it: rule_strs) { - rules.push_back(new G::Rule(it)); + rules.push_back(new G::Rule(it, vocab)); rules.back()->f = new Sv::SparseVector<string, score_t>(); } @@ -349,8 +351,9 @@ operator<<(ostream& os, const Edge& e) "Edge<head=" << e.head->id << \ ", tails=[" << _.str() << "]" \ ", score=" << e.score << \ - ", rule:'" << e.rule->escaped() << "'" << \ - ", f=" << "TODO" << \ + ", rule:'"; + e.rule->escaped(os); + os << "', f=" << "TODO" << \ ", arity=" << e.arity << \ ", mark=" << e.mark << ">"; return os; diff --git a/fast/hypergraph.hh b/fast/hypergraph.hh index 299a62d..1c48a88 100644 --- a/fast/hypergraph.hh +++ b/fast/hypergraph.hh @@ -34,7 +34,7 @@ struct Edge { unsigned int mark = 0; inline bool is_marked() { return mark >= arity; } - friend ostream& operator<<(ostream& os, const Edge& s); + friend ostream& operator<<(ostream& os, const Edge& e); size_t head_id_; vector<size_t> tails_ids_; // node ids @@ -89,7 +89,7 @@ derive(const Path& p, const Node* cur, vector<string>& carry); namespace io { void -read(Hypergraph& hg, vector<G::Rule*>& rules, const string& fn); // FIXME +read(Hypergraph& hg, vector<G::Rule*>& rules, G::Vocabulary& vocab, const string& fn); // FIXME void write(Hypergraph& hg, vector<G::Rule*>& rules, const string& fn); // FIXME diff --git a/fast/main.cc b/fast/main.cc index 08fcfcf..825ad59 100644 --- a/fast/main.cc +++ b/fast/main.cc @@ -6,8 +6,9 @@ int main(int argc, char** argv) { Hg::Hypergraph hg; + G::Vocabulary y; G::Grammar g; - Hg::io::read(hg, g.rules, argv[1]); + Hg::io::read(hg, g.rules, y, argv[1]); //Hg::io::manual(hg, g.rules); clock_t begin = clock(); Hg::Path p; diff --git a/fast/parse.cc b/fast/parse.cc new file mode 100644 index 0000000..06c9fa0 --- /dev/null +++ b/fast/parse.cc @@ -0,0 +1,55 @@ +#include "parse.hh" + + +namespace Parse { + + +} // + + +vector<G::T> tokenize(string s) +{ + istringstream ss(s); + vector<G::T> res; + while (ss.good()) { + string t; + ss >> t; + G::T i(t); + cout << i.word << endl; + res.push_back(i); + } + return res; +} + + +bool operator==(vector<G::Item> const& a, vector<G::Item> const& b) +{ + if (a.size() != b.size()) return false; + for (auto it: a) +} + +int main(int argc, char** argv) +{ + string in("karten haie"); + vector<G::T> tok = tokenize(in); + for (auto it: tok) + cout << it.word << ","; + cout << endl; + size_t n = tok.size(); + + G::Grammar g(argv[1]); + + vector<Span> spans; + Parse::visit(spans, 1, 0, 6); + for (auto it: spans) { + cout << "(" << it.first << "," << it.second << ")" << endl; + } + + Parse::Chart active(n); + Parse::Chart passive(n); + + //init(tok, n, active, passive, g); + + cout << *(g.flat.at(0)) << endl; +} + diff --git a/fast/parse.hh b/fast/parse.hh new file mode 100644 index 0000000..9fbcdea --- /dev/null +++ b/fast/parse.hh @@ -0,0 +1,103 @@ +#pragma once + +#include <vector> +#include <utility> +#include <sstream> +#include <unordered_map> + +#include "grammar.hh" + + +using namespace std; + +typedef pair<size_t,size_t> Span; +namespace std { + template <> + struct hash<Span> + { + size_t + operator()(Span const& k) const + { + return ((hash<size_t>()(k.first) + ^ (hash<size_t>()(k.second) << 1)) >> 1); + } + }; +} + +namespace Parse { + +void visit(vector<Span>& p, + size_t i, size_t l, size_t r, size_t x=0) +{ + for (size_t span = i; span <= r-x; span++) { + for (size_t k = l; k <= r-span; k++) { + p.push_back(Span(k,k+span)); + } + } +} + +struct ChartItem +{ + Span span; + G::Rule const* rule; + vector<ChartItem*> tails; + size_t dot; + + ChartItem(G::Rule* r) + { + rule = r; + dot = 0; + } + + ChartItem(ChartItem const& o) + { + span.first = o.span.first; + span.second = o.span.second; + rule = o.rule; + for (auto it: o.tails) + tails.push_back(it); + dot = o.dot; + } +}; + +struct Chart +{ + size_t n_; + unordered_map<Span, vector<ChartItem*> > m_; + unordered_map<string,bool> b_; + + vector<ChartItem*>& at(Span s) + { + return m_[s]; + } + + string h(ChartItem* item, Span s) + { + ostringstream ss; + ss << item->rule->lhs->symbol; + ss << s.first; + ss << s.second; + return ss.str(); + } + + void add(ChartItem* item, Span s) + { + m_[s].push_back(item); + b_[h(item, s)] = true; + } + + Chart(size_t n) + { + } +}; + + +void init(vector<G::T> const& in, size_t n, Chart& active, Chart& passive, G::Grammar const& g) +{ + for (auto rule: g.flat) { + } +} + + +} // + diff --git a/fast/semiring.hh b/fast/semiring.hh index 1c3ff1d..3f4ac08 100644 --- a/fast/semiring.hh +++ b/fast/semiring.hh @@ -1,7 +1,6 @@ #pragma once -// TODO: others namespace Semiring { template<typename T> diff --git a/fast/sparse_vector.hh b/fast/sparse_vector.hh index 3583240..9d815ff 100644 --- a/fast/sparse_vector.hh +++ b/fast/sparse_vector.hh @@ -20,6 +20,7 @@ struct SparseVector { V zero = 0.f; SparseVector() {}; + SparseVector(string& s) { from_s(this, s); @@ -147,10 +148,9 @@ struct SparseVector { } } - string - repr() const + ostream& + repr(ostream& os) const { - ostringstream os; os << "SparseVector<{"; for (auto it = m_.cbegin(); it != m_.cend(); it++) { os << "'" << it->first << "'=" << it->second; @@ -159,12 +159,11 @@ struct SparseVector { } os << "}>"; - return os.str(); + return os; }; - string - escaped(bool quote_keys=false) const { - ostringstream os; + ostream& + escaped(ostream& os, bool quote_keys=false) const { for (auto it = m_.cbegin(); it != m_.cend(); it++) { if (quote_keys) os << '"'; os << util::json_escape(it->first); @@ -173,10 +172,14 @@ struct SparseVector { if (next(it) != m_.cend()) os << " "; } - return os.str(); + return os; }; - friend ostream& operator<<(ostream& os, const SparseVector& v) { return os << v.repr(); } + friend ostream& + operator<<(ostream& os, const SparseVector& v) + { + return v.repr(os); + } }; } // namespace diff --git a/fast/test/Makefile b/fast/test/Makefile new file mode 100644 index 0000000..0140f63 --- /dev/null +++ b/fast/test/Makefile @@ -0,0 +1,16 @@ +COMPILER=g++ +CFLAGS=-std=c++11 -O3 -I../ +TCMALLOC=/home/pks/src/weaver/fast/gperftools-2.1/lib/libtcmalloc_minimal.a -pthread + + +all: test_grammar test_sparse_vector + +test_grammar: test_grammar.cc ../grammar.hh + $(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) test_grammar.cc -o test_grammar + +test_sparse_vector: test_sparse_vector.cc ../sparse_vector.hh + $(COMPILER) $(CFLAGS) -lstdc++ -lm $(TCMALLOC) test_sparse_vector.cc -o test_sparse_vector + +clean: + rm -f test_grammar test_sparse_vector + diff --git a/fast/test/test_grammar b/fast/test/test_grammar Binary files differnew file mode 100755 index 0000000..088d55a --- /dev/null +++ b/fast/test/test_grammar diff --git a/fast/test/test_grammar.cc b/fast/test/test_grammar.cc new file mode 100644 index 0000000..bbe76e7 --- /dev/null +++ b/fast/test/test_grammar.cc @@ -0,0 +1,20 @@ +#include <fstream> + +#include "grammar.hh" + +using namespace std; + + +int +main(int argc, char** argv) +{ + G::Vocabulary y; + G::Grammar g(argv[1], y); + for (auto it: g.rules) { + it->escaped(cout); + cout << endl; + } + + return 0; +} + diff --git a/fast/test/test_sparse_vector b/fast/test/test_sparse_vector Binary files differnew file mode 100755 index 0000000..c06fe9e --- /dev/null +++ b/fast/test/test_sparse_vector diff --git a/fast/test_sparse_vector.cc b/fast/test/test_sparse_vector.cc index 426bed1..426bed1 100644 --- a/fast/test_sparse_vector.cc +++ b/fast/test/test_sparse_vector.cc diff --git a/fast/test_grammar.cc b/fast/test_grammar.cc deleted file mode 100644 index 3263edd..0000000 --- a/fast/test_grammar.cc +++ /dev/null @@ -1,17 +0,0 @@ -#include <fstream> - -#include "grammar.hh" - -using namespace std; - - -int -main(int argc, char** argv) -{ - G::Grammar g(argv[1]); - for (auto it: g.rules) - cout << it->escaped() << endl; - - return 0; -} - diff --git a/fast/util.hh b/fast/util.hh index 2a28f16..c3e087e 100644 --- a/fast/util.hh +++ b/fast/util.hh @@ -7,23 +7,24 @@ using namespace std; namespace util { - inline string - json_escape(const string& s) { // FIXME: only inline? - ostringstream os; - for (auto it = s.cbegin(); it != s.cend(); it++) { - switch (*it) { - case '"': os << "\\\""; break; - case '\\': os << "\\\\"; break; - case '\b': os << "\\b"; break; - case '\f': os << "\\f"; break; - case '\n': os << "\\n"; break; - case '\r': os << "\\r"; break; - case '\t': os << "\\t"; break; - default: os << *it; break; - } - } - return os.str(); - }; +inline string +json_escape(const string& s) { // FIXME: only inline? + ostringstream os; + for (auto it = s.cbegin(); it != s.cend(); it++) { + switch (*it) { + case '"': os << "\\\""; break; + case '\\': os << "\\\\"; break; + case '\b': os << "\\b"; break; + case '\f': os << "\\f"; break; + case '\n': os << "\\n"; break; + case '\r': os << "\\r"; break; + case '\t': os << "\\t"; break; + default: os << *it; break; + } + } + + return os.str(); +} } // namespace util diff --git a/fast/weaver.hh b/fast/weaver.hh index e7c3238..39d5391 100644 --- a/fast/weaver.hh +++ b/fast/weaver.hh @@ -1,4 +1,5 @@ #pragma once typedef double score_t; +typedef string symbol_t; |