From c0ece722bd7775af14c7f2ec18dcd1fd138607cb Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 10 Oct 2014 14:20:59 +0100 Subject: algorithms: shift reduce dependency parsing --- algorithms/dependency_structures.cc | 382 ++++++++++++++++++++++++++++++++++++ algorithms/shift_reduce_parser.cc | 127 ++++++++++++ algorithms/shift_reduce_parser.rb | 101 ++++++++++ algorithms/shift_reduce_parser_data | 8 + 4 files changed, 618 insertions(+) create mode 100644 algorithms/dependency_structures.cc create mode 100644 algorithms/shift_reduce_parser.cc create mode 100755 algorithms/shift_reduce_parser.rb create mode 100644 algorithms/shift_reduce_parser_data (limited to 'algorithms') diff --git a/algorithms/dependency_structures.cc b/algorithms/dependency_structures.cc new file mode 100644 index 0000000..cd84d2a --- /dev/null +++ b/algorithms/dependency_structures.cc @@ -0,0 +1,382 @@ +#include +#include +#include +#include +#include + +using namespace std; + + +typedef char Word; +struct Node; +typedef vector Structure; + +struct Node { + Word word; + size_t id; + + Structure left; + Structure right; +}; + +struct StackItem { + char state; + + Structure d; +}; + +/* + * @brief Find node with given id below Node n. + * @detail That's variant of BFS + * + */ +Node* +find_in_node(Node* n, size_t const id) +{ + vector queue; + queue.push_back(n); + while (!queue.empty()) { + Node* cur = queue.back(); + queue.pop_back(); + if (id == cur->id) { + return cur; + } else if (id < cur->id) { + for (size_t i = 0; i < cur->left.size(); i++) + queue.push_back(&(cur->left[i])); + } else if (id > cur->id) { + for (size_t i = 0; i < cur->right.size(); i++) + queue.push_back(&(cur->right[i])); + } + } + + return NULL; +} + +/* + * @brief Pretty prints a structure. + * + */ +ostream& +print_structure(ostream& os, Structure& s, size_t level=0, char side='-') +{ + ostringstream indent; + for (size_t i = 0; i <= level; i++) + indent << " "; + + if (side == 'r' || side == '-') { + for (int i = s.size()-1; i >= 0; i--) { // sorted by id + os << indent.str() << s[i].id << "," << s[i].word << " " << side << endl; + print_structure(os, s[i].left, level+1, 'l'); + print_structure(os, s[i].right, level+1, 'r'); + } + } else { + for (size_t i = 0; i < s.size(); i++) { // sorted by id + os << indent.str() << s[i].id << "," << s[i].word << " " << side << endl; + print_structure(os, s[i].left, level+1, 'l'); + print_structure(os, s[i].right, level+1, 'r'); + } + } + + return os; +} + +/* + * @brief Fills dependency structure given words and + * DependencyInfo. + * + */ +void +make_structure(Structure& s, vector& words, vector links) +{ + // create nodes without hierarchy + vector all, queue, sorted; + size_t id = 0; + for (vector::iterator it = words.begin(); + it != words.end(); it++) { + Word word = *it; + Node n; + n.id = id; + n.word = word; + all.push_back(n); + if (links[id] == -1) + queue.push_back(n); + id++; + } + + // topologically sort before construction + while (!queue.empty()) { + Node n = queue.back(); + queue.pop_back(); + sorted.push_back(n); + size_t i = 0; + for (size_t j = 0; j < links.size(); j++) { + if (links[j] == n.id) + queue.push_back(all[i]); + i++; + } + } + + // build structure/s + for (vector::iterator n = sorted.begin(); + n != sorted.end(); n++) { + if (links[n->id] == -1) { // top-level + s.push_back(*n); + } else { + Node* a; + for (size_t k = 0; k < s.size(); k++) { + a = find_in_node(&(s[k]), links[n->id]); + if (a) { + if (n->id < a->id) { // left + a->left.push_back(*n); + } else { // right + a->right.push_back(*n); + } + break; + } + } + } + } +} + +void +score_and_prune(Node* n, bool final=false) +{ + if (n->left.size() == 0 + && n->right.size() == 0) return; + + for (int i = n->left.size()-1; i >= 0; i--) + cout << n->left[i].word; + cout << "{" << n->word; + if (final) cout << "!"; + cout << "}"; + for (int i = n->right.size()-1; i >= 0; i--) + cout << n->right[i].word; + cout << endl; + + for (int i = 0; i < n->left.size(); i++) + score_and_prune(&(n->left[i])); + for (int i = 0; i < n->right.size(); i++) + score_and_prune(&(n->right[i])); + + n->left.clear(); + n->right.clear(); +} + +/* + * where = left or right + * + */ +void +reduce(Node* head, Structure& a, char where) +{ + for (Structure::iterator it = a.begin(); + it != a.end(); it++) { + if (where == 'l') + head->left.push_back(*it); + else // where == 'r' + head->right.push_back(*it); + } +} + +void +test1() +{ + cerr << "TEST 1" << endl; + vector words; + words.push_back('e'); + words.push_back('f'); + words.push_back('g'); + words.push_back('x'); + words.push_back('h'); + words.push_back('i'); + vector links; + links.push_back(4); + links.push_back(0); + links.push_back(0); + links.push_back(2); + links.push_back(-1); + links.push_back(-1); + Structure s; + make_structure(s, words, links); + print_structure(cout, s); + cerr << endl; +} + +void +test2() +{ + cerr << "TEST 2" << endl; + vector words; + words.push_back('e'); + words.push_back('f'); + words.push_back('x'); + words.push_back('g'); + words.push_back('h'); + words.push_back('i'); + vector links; + links.push_back(4); + links.push_back(0); + links.push_back(3); + links.push_back(0); + links.push_back(-1); + links.push_back(-1); + Structure s; + make_structure(s, words, links); + print_structure(cout, s); + cerr << endl; +} + +void +test3() +{ + cerr << "TEST 3" << endl; + + vector w1; + w1.push_back('a'); + vector l1; + l1.push_back(-1); + Structure s1; + make_structure(s1, w1, l1); + print_structure(cout, s1); + cout << endl; + + vector w2; + w2.push_back('b'); + w2.push_back('c'); + vector l2; + l2.push_back(-1); + l2.push_back(-1); + Structure s2; + make_structure(s2, w2, l2); + print_structure(cout, s2); + cout << endl; + + vector w3; + w3.push_back('d'); + vector l3; + l3.push_back(-1); + Structure s3; + make_structure(s3, w3, l3); + print_structure(cout, s3); + cout << endl; + + vector w4; + w4.push_back('e'); + w4.push_back('f'); + w4.push_back('g'); + vector l4; + l4.push_back(-1); + l4.push_back(0); + l4.push_back(0); + Structure s4; + make_structure(s4, w4, l4); + print_structure(cout, s4); + cout << endl; + + vector w5; + w5.push_back('h'); + vector l5; + l5.push_back(-1); + Structure s5; + make_structure(s5, w5, l5); + print_structure(cout, s5); + cout << endl; +} + +void +test4() +{ + cerr << "TEST 3" << endl; + vector words; + words.push_back('a'); + words.push_back('b'); + words.push_back('c'); + words.push_back('d'); + words.push_back('e'); + words.push_back('f'); + words.push_back('g'); + words.push_back('h'); + words.push_back('i'); + vector links; + links.push_back(7); + links.push_back(3); + links.push_back(3); + links.push_back(7); + links.push_back(7); + links.push_back(4); + links.push_back(4); + links.push_back(-1); + links.push_back(-1); + Structure s; + make_structure(s, words, links); + print_structure(cout, s); + score_and_prune(&(s.back()), true); + cerr << endl; +} + +void +test5() +{ + cerr << "TEST 5" << endl; + + vector w2; + w2.push_back('b'); + w2.push_back('c'); + vector l2; + l2.push_back(-1); + l2.push_back(-1); + Structure s2; + make_structure(s2, w2, l2); + print_structure(cout, s2); + cout << endl; + + vector w3; + w3.push_back('d'); + vector l3; + l3.push_back(-1); + Structure s3; + make_structure(s3, w3, l3); + print_structure(cout, s3); + cout << endl; + + vector w4; + w4.push_back('e'); + w4.push_back('f'); + w4.push_back('g'); + vector l4; + l4.push_back(-1); + l4.push_back(0); + l4.push_back(0); + Structure s4; + make_structure(s4, w4, l4); + print_structure(cout, s4); + cout << endl; + + reduce(&(s3.back()), s2, 'l'); + print_structure(cout, s3); + cerr << endl; + + reduce(&(s3.back()), s4, 'r'); + print_structure(cout, s3); + cerr << endl; + + score_and_prune(&(s3.back().right[0])); + cerr << endl; + print_structure(cout, s3); + cerr << endl; + score_and_prune(&(s3.back())); + cerr << endl; +} + +int +main(void) +{ + test1(); + test2(); + test3(); + test4(); + test5(); + + return 0; +} + diff --git a/algorithms/shift_reduce_parser.cc b/algorithms/shift_reduce_parser.cc new file mode 100644 index 0000000..563c8e4 --- /dev/null +++ b/algorithms/shift_reduce_parser.cc @@ -0,0 +1,127 @@ +#include +#include +#include +#include +#include + + +using namespace std; + +typedef std::pair Key; + +struct E +{ + vector h; + + Key get() { + char second = h.back(); + h.pop_back(); + char first = h.back(); + h.pop_back(); + return make_pair(first, second); + } + bool good() { return h.size() > 1; } + char add(char c) { h.push_back(c); } +}; + +struct P +{ + std::map actionTable_; + + P() + { + createActionTable(); + } + + void r(E& e) + { + cout << "before: "; + for (auto it: e.h) + cout << it; + cout << endl; + bool stop = false; + while (e.good() && !stop) { + pair k = e.get(); + char action = actionTable_[k]; + switch (action) { + case 'L': + e.add('f'); + break; + case 'R': + e.add('f'); + stop = true; + break; + case 'S': + e.add(k.first); + e.add(k.second); + stop = true; + break; + case 'I': + default: + e.add('u'); + stop = true; + break; + } + } + cout << " after: "; + for (auto it: e.h) + cout << it; + cout << endl; + cout << "--" << endl; + } + + void createActionTable() + { + // u -> + actionTable_[Key('u','u')] = 'S'; // Shift + actionTable_[Key('u','f')] = 'S'; + actionTable_[Key('u','l')] = 'S'; + actionTable_[Key('u','r')] = 'I'; // Illegal + + // f -> + actionTable_[Key('f','f')] = 'S'; // ambiguous + actionTable_[Key('f','l')] = 'S'; + actionTable_[Key('f','r')] = 'R'; // Right-reduce + + // l -> + actionTable_[Key('l','f')] = 'L'; // Left-reduce + actionTable_[Key('l','l')] = 'S'; + actionTable_[Key('l','r')] = 'I'; + + // r -> + actionTable_[Key('r','f')] = 'I'; + actionTable_[Key('r','l')] = 'I'; + actionTable_[Key('r','r')] = 'I'; + } +}; + + +int main(void) +{ + E e; + P p; + + /*e.add('u'); + p.r(e); + + e.add('l'); + p.r(e); + + e.add('l'); + p.r(e); + + e.add('f'); + p.r(e); + + e.add('r'); + p.r(e);*/ + + string s("frllllfrlllllllf"); + for (auto it: s) { + e.add(it); + p.r(e); + } + + return 0; +} + diff --git a/algorithms/shift_reduce_parser.rb b/algorithms/shift_reduce_parser.rb new file mode 100755 index 0000000..948d8c1 --- /dev/null +++ b/algorithms/shift_reduce_parser.rb @@ -0,0 +1,101 @@ +#!/usr/bin/env ruby + + +class Item + attr_accessor :word, :type, :link + def initialize word, type + @word = word + @type = type + @link = nil + end +end + +class MockShiftReduceParser + attr_accessor :state + + def initialize + @state = nil + + @a = {} + @a[nil] = {} + @a[nil][nil] = 'S' + @a[nil]['f'] = 'S' + @a[nil]['l'] = 'S' + @a[nil]['r'] = 'illegal' + + @a['f'] = {} + @a['f']['f'] = 'S' # FIXME conflict + @a['f']['l'] = 'S' + @a['f']['r'] = 'R' + + @a['l'] = {} + @a['l']['f'] = 'L' + @a['l']['l'] = 'S' + @a['l']['r'] = 'illegal' + + @a['r'] = {} + @a['r']['f'] = 'illegal' + @a['r']['l'] = 'illegal' + @a['r']['r'] = 'illegal' + end + + def advance type + if !@state + @state = type + puts "-/- -> S -> #{@state}" + return + end + action = @a[@state][type] + prev_state = @state + if action == 'S' + @state = type + elsif action == 'L' or action == 'R' + @state = 'f' + end + puts "#{prev_state}/#{type} -> #{action} -> #{@state}" + + return action + end +end + + +parser = MockShiftReduceParser.new + +items = [] +head = nil +stack = [] + +while line = STDIN.gets + word, type = line.split + item = Item.new word, type + items << item + stack << item + + head = item if !head + + action = parser.advance item.type + if action == "S" + head = item + elsif action == "R" + _ = stack.pop + _.link = head + elsif action == "L" + head = item + puts stack.size + del = [] + stack.each_with_index { |i,j| + if i.type == 'l' + i.link = head + del << j + end + } + del.reverse.each { |d| stack.delete_at(d) } + puts stack.size + end +end + +stack.each { |i| i.link = head } + +puts "---" +items.each { |j| if j.link then puts "#{j.word} #{j.link.word}" else puts "#{j.word} ?" end } + diff --git a/algorithms/shift_reduce_parser_data b/algorithms/shift_reduce_parser_data new file mode 100644 index 0000000..c4714ab --- /dev/null +++ b/algorithms/shift_reduce_parser_data @@ -0,0 +1,8 @@ +a f +b l +c l +d f +e f +f r +g r +h f -- cgit v1.2.3