#include "grammar.hh"
namespace G {
/*
* G::NT
*
*/
NT::NT(string& s)
{
s.erase(0, 1); s.pop_back(); // remove '[' and ']'
istringstream ss(s);
if (ss >> index) { // [i]
symbol = "";
index = stoi(s);
return;
} else { // [X]
symbol = s;
index = 0;
return;
}
string buf;
size_t j = 0;
index = 0; // default
while (ss.good() && getline(ss, buf, ',')) {
if (j == 0) {
symbol = buf;
} else {
index = stoi(buf);
}
j++;
}
}
string
NT::repr() const
{
ostringstream os;
os << "NT<" << symbol << "," << index << ">";
return os.str();
}
string
NT::escaped() const
{
ostringstream os;
os << "[" << symbol;
if (index > 0)
os << "," << index;
os << "]";
return os.str();
}
ostream&
operator<<(ostream& os, const NT& nt)
{
return os << nt.repr();
}
/*
* G::T
*
*/
T::T(const string& s)
{
word = s;
}
string
T::repr() const
{
ostringstream os;
os << "T<" << word << ">";
return os.str();
}
string
T::escaped() const
{
return util::json_escape(word);
}
ostream&
operator<<(ostream& os, const T& t)
{
return os << t.repr();
}
/*
* G::Item
*
* Better solve this by inheritance
* -> rhs, target as vector ?
*
*/
Item::Item(string& s)
{
if (s.front() == '[' && s.back() == ']') {
type = NON_TERMINAL;
nt = new NT(s);
} else {
type = TERMINAL;
t = new T(s);
}
}
string
Item::repr() const
{
ostringstream os;
if (type == TERMINAL)
os << t->repr();
else
os << nt->repr();
return os.str();
}
string
Item::escaped() const
{
ostringstream os;
if (type == TERMINAL)
os << t->escaped();
else
os << nt->escaped();
return os.str();
}
ostream&
operator<<(ostream& os, const Item& i)
{
return os << i.repr();
}
/*
* G::Rule
*
*/
Rule::Rule(const string& s)
{
from_s(this, s);
}
void
Rule::from_s(Rule* r, const string& s)
{
stringstream ss(s);
size_t j = 0;
string buf;
r->arity = 0;
size_t index = 1;
vector rhs_nt;
r->f = new Sv::SparseVector();
while (ss >> buf) {
if (buf == "|||") { j++; continue; }
if (j == 0) { // LHS
r->lhs = new NT(buf);
} else if (j == 1) { // RHS
r->rhs.push_back(new Item(buf));
if (r->rhs.back()->type == NON_TERMINAL) {
rhs_nt.push_back(r->rhs.back()->nt);
r->arity++;
}
} else if (j == 2) { // TARGET
r->target.push_back(new Item(buf));
if (r->target.back()->type == NON_TERMINAL) {
r->order.insert(make_pair(index, r->target.back()->nt->index));
if (r->target.back()->nt->symbol == "")
r->target.back()->nt->symbol = rhs_nt[r->target.back()->nt->index-1]->symbol;
index++;
}
} else if (j == 3) { // F TODO
Sv::SparseVector::from_s(r->f, buf); // FIXME this is slow!!!
} else if (j == 4) { // A TODO
} else {
// ERROR
}
if (j == 4) break;
}
}
string
Rule::repr() const
{
ostringstream os;
os << "Rulerepr() << \
", rhs:{";
for (auto it = rhs.begin(); it != rhs.end(); it++) {
os << (**it).repr();
if (next(it) != rhs.end()) os << " ";
}
os << "}, target:{";
for (auto it = target.begin(); it != target.end(); it++) {
os << (**it).repr();
if (next(it) != target.end()) os << " ";
}
os << "}" \
", f:" << f->repr() << \
", arity=" << arity << \
", map:" << "TODO" << \
">";
return os.str();
}
string
Rule::escaped() const
{
ostringstream os;
os << lhs->escaped() << " ||| ";
for (auto it = rhs.begin(); it != rhs.end(); it++) {
os << (**it).escaped();
if (next(it) != rhs.end()) os << " ";
}
os << " ||| ";
for (auto it = target.begin(); it != target.end(); it++) {
os << (**it).escaped();
if (next(it) != target.end()) os << " ";
}
os << " ||| ";
os << f->escaped();
os << " ||| ";
os << "TODO(alignment)";
return os.str();
}
ostream&
operator<<(ostream& os, const Rule& r)
{
return os << r.repr();
}
/*
* G::Grammmar
*
*/
Grammar::Grammar(const string& fn)
{
ifstream ifs(fn);
string line;
while (getline(ifs, line)) {
G::Rule* r = new G::Rule(line);
rules.push_back(r);
if (r->arity == 0)
flat.push_back(r);
else if (r->rhs.front()->type == NON_TERMINAL)
start_nt.push_back(r);
else
start_t.push_back(r);
}
}
ostream&
operator<<(ostream& os, const Grammar& g)
{
for (const auto it: g.rules)
os << it->repr() << endl;
return os;
}
} // namespace G