diff options
-rw-r--r-- | fast/grammar.cc | 70 | ||||
-rw-r--r-- | fast/grammar.hh | 8 | ||||
-rw-r--r-- | fast/hypergraph.cc | 19 | ||||
-rw-r--r-- | fast/hypergraph.hh | 10 | ||||
-rw-r--r-- | fast/sparse_vector.hh | 3 | ||||
-rw-r--r-- | fast/test_grammar.cc | 4 |
6 files changed, 100 insertions, 14 deletions
diff --git a/fast/grammar.cc b/fast/grammar.cc index 07d4732..7f2d506 100644 --- a/fast/grammar.cc +++ b/fast/grammar.cc @@ -1,6 +1,25 @@ #include "grammar.hh" +string +esc_str(const string& s) { // FIXME + ostringstream os; + for (auto it = s.cbegin(); it != s.cend(); it++) { + switch (*it) { + case '"': os << "\\\""; break; + case '\\': os << "\\\\"; break; + case '\b': os << "\\b"; break; + case '\f': os << "\\f"; break; + case '\n': os << "\\n"; break; + case '\r': os << "\\r"; break; + case '\t': os << "\\t"; break; + default: os << *it; break; + } + } + + return os.str(); +} + namespace G { NT::NT(string& s) @@ -84,6 +103,19 @@ Item::repr() const os << t->repr(); else os << nt->repr(); + + return os.str(); +} + +string +Item::escaped() const +{ + ostringstream os; + if (type == TERMINAL) + os << t->escaped(); + else + os << nt->escaped(); + return os.str(); } @@ -98,6 +130,19 @@ NT::repr() const { ostringstream os; os << "NT<" << symbol << "," << index << ">"; + + return os.str(); +} + +string +NT::escaped() const +{ + ostringstream os; + os << "[" << symbol; + if (index > 0) + os << "," << index; + os << "]"; + return os.str(); } @@ -112,6 +157,7 @@ T::repr() const { ostringstream os; os << "T<" << word << ">"; + return os.str(); } @@ -141,6 +187,7 @@ Rule::repr() const ", arity=" << arity << \ ", map:" << "TODO" << \ ">"; + return os.str(); } @@ -150,11 +197,34 @@ operator<<(ostream& os, const Rule& r) return os << r.repr(); } +string +Rule::escaped() const +{ + ostringstream os; + os << lhs->escaped() << " ||| "; + for (auto it = rhs.begin(); it != rhs.end(); it++) { + os << (**it).escaped(); + if (next(it) != rhs.end()) os << " "; + } + os << " ||| "; + for (auto it = target.begin(); it != target.end(); it++) { + os << (**it).escaped(); + if (next(it) != target.end()) os << " "; + } + os << " ||| "; + os << "TODO"; + os << " ||| "; + os << "TODO"; + + return os.str(); +} + ostream& operator<<(ostream& os, const Grammar& g) { for (auto it = g.rules.begin(); it != g.rules.end(); it++) os << (**it).repr() << endl; + return os; } diff --git a/fast/grammar.hh b/fast/grammar.hh index 76b96a6..51501cf 100644 --- a/fast/grammar.hh +++ b/fast/grammar.hh @@ -12,6 +12,8 @@ using namespace std; +string esc_str(const string& s); // FIXME + namespace G { struct NT { @@ -21,6 +23,7 @@ struct NT { NT() {}; NT(string& s); string repr() const; + string escaped() const; friend ostream& operator<<(ostream& os, const NT& t); }; @@ -29,6 +32,7 @@ struct T { T(string& s); string repr() const; + string escaped() const { return esc_str(word); } friend ostream& operator<<(ostream& os, const NT& nt); }; @@ -44,6 +48,7 @@ struct Item { Item(string& s); string repr() const; + string escaped() const; friend ostream& operator<<(ostream& os, const Item& i); }; @@ -58,7 +63,10 @@ struct Rule { Rule() {}; Rule(string& s); string repr() const; + string escaped() const; friend ostream& operator<<(ostream& os, const Rule& r); + + MSGPACK_DEFINE(); }; struct Grammar { diff --git a/fast/hypergraph.cc b/fast/hypergraph.cc index 9101c92..6b7bd07 100644 --- a/fast/hypergraph.cc +++ b/fast/hypergraph.cc @@ -68,10 +68,10 @@ viterbi(Hypergraph& hg) namespace io { void -read(Hypergraph& hg, string fn) +read(Hypergraph& hg, vector<G::Rule*> rules, string fn) { ifstream ifs(fn); - size_t i = 0, nn, ne; + size_t i = 0, nr, nn, ne; msgpack::unpacker pac; while(true) { pac.reserve_buffer(32*1024); @@ -112,15 +112,18 @@ read(Hypergraph& hg, string fn) } void -write(Hypergraph& hg, string fn) +write(Hypergraph& hg, vector<G::Rule*> rules, string fn) { FILE* file = fopen(fn.c_str(), "wb"); msgpack::fbuffer fbuf(file); + msgpack::pack(fbuf, rules.size()); msgpack::pack(fbuf, hg.nodes.size()); msgpack::pack(fbuf, hg.edges.size()); - for (auto it = hg.nodes.begin(); it != hg.nodes.end(); it++) + for (auto it = rules.cbegin(); it != rules.cend(); it++) msgpack::pack(fbuf, **it); - for (auto it = hg.edges.begin(); it != hg.edges.end(); it++) + for (auto it = hg.nodes.cbegin(); it != hg.nodes.cend(); it++) + msgpack::pack(fbuf, **it); + for (auto it = hg.edges.cbegin(); it != hg.edges.cend(); it++) msgpack::pack(fbuf, **it); fclose(file); } @@ -216,7 +219,7 @@ manual(Hypergraph& hg) hg.nodes_by_id[6]->outgoing.push_back(z); } -} // namespace +} // namespace Hg::io ostream& operator<<(ostream& os, const Node& n) @@ -243,12 +246,12 @@ operator<<(ostream& os, const Edge& e) "Edge<head=" << e.head->id << \ ", tails=[" << _.str() << "]" \ ", score=" << e.score << \ - ", rule:'" << "TODO" << "'" << \ + ", rule:'" << e.rule->escaped() << "'" << \ ", f=" << "TODO" << \ ", arity=" << e.arity << \ ", mark=" << e.mark << ">"; return os; } -} // namespace +} // namespace Hg diff --git a/fast/hypergraph.hh b/fast/hypergraph.hh index 86b9069..79ee97b 100644 --- a/fast/hypergraph.hh +++ b/fast/hypergraph.hh @@ -16,6 +16,7 @@ #include "grammar.hh" #include "semiring.hh" #include "dummyvector.h" +#include "sparse_vector.hh" using namespace std; @@ -31,7 +32,7 @@ struct Edge { Node* head; vector<Node*> tails; score_t score; - string rule; // FIXME + G::Rule* rule; unsigned int arity = 0; unsigned int mark = 0; @@ -40,8 +41,9 @@ struct Edge { size_t head_id_; vector<size_t> tails_ids_; // node ids + size_t rule_id_; - MSGPACK_DEFINE(head_id_, tails_ids_, rule, score, arity); + MSGPACK_DEFINE(head_id_, tails_ids_, rule_id_, score, arity); }; struct Node { @@ -82,10 +84,10 @@ viterbi(Hypergraph& hg); namespace io { void -read(Hypergraph& hg, string fn); +read(Hypergraph& hg, vector<G::Rule*> rules, string fn); void -write(Hypergraph& hg, string fn); +write(Hypergraph& hg, vector<G::Rule*> rules, string fn); void manual(Hypergraph& hg); diff --git a/fast/sparse_vector.hh b/fast/sparse_vector.hh index 8fdc1b9..dd7f3cf 100644 --- a/fast/sparse_vector.hh +++ b/fast/sparse_vector.hh @@ -4,7 +4,8 @@ #include <vector> #include <sstream> -#include "hypergraph.hh" // FIXME +typedef double score_t; // FIXME +typedef double weight_t; using namespace std; diff --git a/fast/test_grammar.cc b/fast/test_grammar.cc index 9c5b74e..34a55ba 100644 --- a/fast/test_grammar.cc +++ b/fast/test_grammar.cc @@ -9,7 +9,9 @@ int main(int argc, char** argv) { G::Grammar g(argv[1]); - cout << g << endl; + for (auto it = g.rules.begin(); it != g.rules.end(); it++) + cout << (**it).escaped() << endl; + return 0; } |