summaryrefslogtreecommitdiff
path: root/fast/grammar.hh
diff options
context:
space:
mode:
Diffstat (limited to 'fast/grammar.hh')
-rw-r--r--fast/grammar.hh316
1 files changed, 0 insertions, 316 deletions
diff --git a/fast/grammar.hh b/fast/grammar.hh
deleted file mode 100644
index 4906c46..0000000
--- a/fast/grammar.hh
+++ /dev/null
@@ -1,316 +0,0 @@
-#pragma once
-
-#include <iostream>
-#include <fstream>
-#include <map>
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include <msgpack.hpp>
-
-#include "sparse_vector.hh"
-#include "util.hh"
-
-using namespace std;
-
-
-namespace G {
-
-enum item_type {
- UNDEFINED,
- TERMINAL,
- NON_TERMINAL
-};
-
-struct Item {
- virtual size_t index() const { return 0; }
- virtual symbol_t symbol() const { return ""; }
- virtual item_type type() const { return UNDEFINED; }
-
- virtual ostream& repr(ostream& os) const { return os << "Item<>"; }
- virtual ostream& escaped(ostream& os) const { return os << ""; }
-
- friend ostream&
- operator<<(ostream& os, const Item& i)
- {
- return i.repr(os);
- };
-};
-
-struct NT : public Item {
- symbol_t symbol_;
- size_t index_;
-
- NT() {}
-
- NT(string const& s)
- {
- index_ = 0; // default
- string t(s);
- t.erase(0, 1); t.pop_back(); // remove '[' and ']'
- istringstream ss(t);
- if (ss >> index_) { // [i]
- symbol_ = "";
- index_ = stoi(s);
- return;
- } else {
- ss.clear();
- string buf;
- size_t j = 0;
- while (ss.good() && getline(ss, buf, ',')) {
- if (j == 0) {
- symbol_ = buf;
- } else {
- index_ = stoi(buf);
- }
- j++;
- }
- }
- }
-
- virtual size_t index() const { return index_; }
- virtual symbol_t symbol() const { return symbol_; }
- virtual item_type type() const { return NON_TERMINAL; }
-
- virtual ostream&
- repr(ostream& os) const
- {
- return os << "NT<" << symbol_ << "," << index_ << ">";
- }
-
- virtual ostream&
- escaped(ostream& os) const
- {
- os << "[" << symbol_;
- if (index_ > 0)
- os << "," << index_;
- os << "]";
-
- return os;
- }
-};
-
-struct T : public Item {
- symbol_t symbol_;
-
- T(string const& s)
- {
- symbol_ = s;
- }
-
- virtual symbol_t symbol() const { return symbol_; }
- virtual item_type type() const { return TERMINAL; }
-
- virtual ostream&
- repr(ostream& os) const
- {
- return os << "T<" << symbol_ << ">";
- }
-
- virtual ostream&
- escaped(ostream& os) const
- {
- os << util::json_escape(symbol_);
- }
-};
-
-struct Vocabulary
-{
- unordered_map<symbol_t, size_t> m_;
- vector<Item*> items_;
-
- bool
- is_non_terminal(string const& s)
- {
- return s.front() == '[' && s.back() == ']';
- }
-
- Item*
- get(symbol_t const& s)
- {
- if (is_non_terminal(s))
- return new NT(s);
- if (m_.find(s) != m_.end())
- return items_[m_[s]];
- return add(s);
- }
-
- Item*
- add(symbol_t const& s)
- {
- size_t next_index_ = items_.size();
- T* item = new T(s);
- items_.push_back(item);
- m_[s] = next_index_;
-
- return item;
- }
-};
-
-struct Rule {
- NT* lhs;
- vector<Item*> rhs;
- vector<Item*> target;
- size_t arity;
-Sv::SparseVector<string, score_t>* f;
- map<size_t, size_t> order;
- string as_str_;
-
- Rule() {}
-
- Rule(string const& s, Vocabulary& vocab) { from_s(this, s, vocab); }
-
- static void
- from_s(Rule* r, string const& s, Vocabulary& vocab)
- {
- istringstream ss(s);
- string buf;
- size_t j = 0, i = 1;
- r->arity = 0;
- vector<NT*> rhs_non_terminals;
- r->f = new Sv::SparseVector<string, score_t>();
- while (ss >> buf) {
- if (buf == "|||") { j++; continue; }
- if (j == 0) { // left-hand side
- r->lhs = new NT(buf);
- } else if (j == 1) { // right-hand side
- Item* item = vocab.get(buf);
- r->rhs.push_back(item);
- if (item->type() == NON_TERMINAL) {
- r->arity++;
- rhs_non_terminals.push_back(reinterpret_cast<NT*>(item));
- }
- } else if (j == 2) { // target
- Item* item = vocab.get(buf);
- if (item->type() == NON_TERMINAL) {
- r->order.insert(make_pair(i, item->index()));
- i++;
- if (item->symbol() == "") { // only [1], [2] ... on target
- reinterpret_cast<NT*>(item)->symbol_ = \
- rhs_non_terminals[item->index()-1]->symbol();
- }
- }
- r->target.push_back(item);
- } else if (j == 3) { // feature vector
- Sv::SparseVector<string, score_t>::from_s(r->f, buf);
- // FIXME: this is slow!!! ^^^
- } else if (j == 4) { // alignment
- } else {
- // error
- }
- if (j == 4) break;
- }
- }
-
- ostream&
- repr(ostream& os) const
- {
- os << "Rule<lhs=";
- lhs->repr(os);
- os << ", rhs:{";
- for (auto it = rhs.begin(); it != rhs.end(); it++) {
- (**it).repr(os);
- if (next(it) != rhs.end()) os << " ";
- }
- os << "}, target:{";
- for (auto it = target.begin(); it != target.end(); it++) {
- (**it).repr(os);
- if (next(it) != target.end()) os << " ";
- }
- os << "}, f:";
- f->repr(os);
- os << ", arity=" << arity << \
- ", order:{";
- for (auto it = order.begin(); it != order.end(); it++) {
- os << it->first << "->" << it->second;
- if (next(it) != order.end()) os << ", ";
- }
- os << "}>";
-
- return os;
- }
-
- ostream&
- escaped(ostream& os) const
- {
- lhs->escaped(os);
- os << " ||| ";
- for (auto it = rhs.begin(); it != rhs.end(); it++) {
- (**it).escaped(os);
- if (next(it) != rhs.end()) os << " ";
- }
- os << " ||| ";
- for (auto it = target.begin(); it != target.end(); it++) {
- (**it).escaped(os);
- if (next(it) != target.end()) os << " ";
- }
- os << " ||| ";
- f->escaped(os);
- os << " ||| " << \
- "TODO";
-
- return os;
- };
-
- friend ostream&
- operator<<(ostream& os, Rule const& r)
- {
- return r.repr(os);
- };
-
- // --
- void
- prep_for_serialization_()
- {
- ostringstream os;
- escaped(os);
- as_str_ = os.str();
- };
- MSGPACK_DEFINE(as_str_);
- // ^^^ FIXME
-};
-
-struct Grammar {
- vector<Rule*> rules;
- vector<Rule*> flat;
- vector<Rule*> start_non_terminal;
- vector<Rule*> start_terminal;
-
- Grammar() {}
-
- Grammar(string const& fn, Vocabulary& vocab)
- {
- ifstream ifs(fn);
- string line;
- while (getline(ifs, line)) {
- G::Rule* r = new G::Rule(line, vocab);
- rules.push_back(r);
- if (r->arity == 0)
- flat.push_back(r);
- else if (r->rhs.front()->type() == NON_TERMINAL)
- start_non_terminal.push_back(r);
- else
- start_terminal.push_back(r);
- }
- }
-
- void add_glue();
- // ^^^ TODO
- void add_pass_through(const string& input);
- // ^^^ TODO
-
- friend ostream&
- operator<<(ostream& os, Grammar const& g)
- {
- for (const auto it: g.rules) {
- it->repr(os);
- os << endl;
- }
-
- return os;
- }
-};
-
-} // namespace G
-