summaryrefslogtreecommitdiff
path: root/algorithms
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-10-10 14:20:59 +0100
committerPatrick Simianer <p@simianer.de>2014-10-10 14:20:59 +0100
commitc0ece722bd7775af14c7f2ec18dcd1fd138607cb (patch)
treefab93a5d48d89c9e3c7473a1ca5aafe7b3bcaeed /algorithms
parentde54ca7c31487a33fa8148159e9b3dea96a9dd4c (diff)
algorithms: shift reduce dependency parsing
Diffstat (limited to 'algorithms')
-rw-r--r--algorithms/dependency_structures.cc382
-rw-r--r--algorithms/shift_reduce_parser.cc127
-rwxr-xr-xalgorithms/shift_reduce_parser.rb101
-rw-r--r--algorithms/shift_reduce_parser_data8
4 files changed, 618 insertions, 0 deletions
diff --git a/algorithms/dependency_structures.cc b/algorithms/dependency_structures.cc
new file mode 100644
index 0000000..cd84d2a
--- /dev/null
+++ b/algorithms/dependency_structures.cc
@@ -0,0 +1,382 @@
+#include <iostream>
+#include <vector>
+#include <map>
+#include <algorithm>
+#include <sstream>
+
+using namespace std;
+
+
+typedef char Word;
+struct Node;
+typedef vector<Node> Structure;
+
+struct Node {
+ Word word;
+ size_t id;
+
+ Structure left;
+ Structure right;
+};
+
+struct StackItem {
+ char state;
+
+ Structure d;
+};
+
+/*
+ * @brief Find node with given id below Node n.
+ * @detail That's variant of BFS
+ *
+ */
+Node*
+find_in_node(Node* n, size_t const id)
+{
+ vector<Node*> queue;
+ queue.push_back(n);
+ while (!queue.empty()) {
+ Node* cur = queue.back();
+ queue.pop_back();
+ if (id == cur->id) {
+ return cur;
+ } else if (id < cur->id) {
+ for (size_t i = 0; i < cur->left.size(); i++)
+ queue.push_back(&(cur->left[i]));
+ } else if (id > cur->id) {
+ for (size_t i = 0; i < cur->right.size(); i++)
+ queue.push_back(&(cur->right[i]));
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * @brief Pretty prints a structure.
+ *
+ */
+ostream&
+print_structure(ostream& os, Structure& s, size_t level=0, char side='-')
+{
+ ostringstream indent;
+ for (size_t i = 0; i <= level; i++)
+ indent << " ";
+
+ if (side == 'r' || side == '-') {
+ for (int i = s.size()-1; i >= 0; i--) { // sorted by id
+ os << indent.str() << s[i].id << "," << s[i].word << " " << side << endl;
+ print_structure(os, s[i].left, level+1, 'l');
+ print_structure(os, s[i].right, level+1, 'r');
+ }
+ } else {
+ for (size_t i = 0; i < s.size(); i++) { // sorted by id
+ os << indent.str() << s[i].id << "," << s[i].word << " " << side << endl;
+ print_structure(os, s[i].left, level+1, 'l');
+ print_structure(os, s[i].right, level+1, 'r');
+ }
+ }
+
+ return os;
+}
+
+/*
+ * @brief Fills dependency structure given words and
+ * DependencyInfo.
+ *
+ */
+void
+make_structure(Structure& s, vector<Word>& words, vector<int> links)
+{
+ // create nodes without hierarchy
+ vector<Node> all, queue, sorted;
+ size_t id = 0;
+ for (vector<Word>::iterator it = words.begin();
+ it != words.end(); it++) {
+ Word word = *it;
+ Node n;
+ n.id = id;
+ n.word = word;
+ all.push_back(n);
+ if (links[id] == -1)
+ queue.push_back(n);
+ id++;
+ }
+
+ // topologically sort before construction
+ while (!queue.empty()) {
+ Node n = queue.back();
+ queue.pop_back();
+ sorted.push_back(n);
+ size_t i = 0;
+ for (size_t j = 0; j < links.size(); j++) {
+ if (links[j] == n.id)
+ queue.push_back(all[i]);
+ i++;
+ }
+ }
+
+ // build structure/s
+ for (vector<Node>::iterator n = sorted.begin();
+ n != sorted.end(); n++) {
+ if (links[n->id] == -1) { // top-level
+ s.push_back(*n);
+ } else {
+ Node* a;
+ for (size_t k = 0; k < s.size(); k++) {
+ a = find_in_node(&(s[k]), links[n->id]);
+ if (a) {
+ if (n->id < a->id) { // left
+ a->left.push_back(*n);
+ } else { // right
+ a->right.push_back(*n);
+ }
+ break;
+ }
+ }
+ }
+ }
+}
+
+void
+score_and_prune(Node* n, bool final=false)
+{
+ if (n->left.size() == 0
+ && n->right.size() == 0) return;
+
+ for (int i = n->left.size()-1; i >= 0; i--)
+ cout << n->left[i].word;
+ cout << "{" << n->word;
+ if (final) cout << "!";
+ cout << "}";
+ for (int i = n->right.size()-1; i >= 0; i--)
+ cout << n->right[i].word;
+ cout << endl;
+
+ for (int i = 0; i < n->left.size(); i++)
+ score_and_prune(&(n->left[i]));
+ for (int i = 0; i < n->right.size(); i++)
+ score_and_prune(&(n->right[i]));
+
+ n->left.clear();
+ n->right.clear();
+}
+
+/*
+ * where = left or right
+ *
+ */
+void
+reduce(Node* head, Structure& a, char where)
+{
+ for (Structure::iterator it = a.begin();
+ it != a.end(); it++) {
+ if (where == 'l')
+ head->left.push_back(*it);
+ else // where == 'r'
+ head->right.push_back(*it);
+ }
+}
+
+void
+test1()
+{
+ cerr << "TEST 1" << endl;
+ vector<Word> words;
+ words.push_back('e');
+ words.push_back('f');
+ words.push_back('g');
+ words.push_back('x');
+ words.push_back('h');
+ words.push_back('i');
+ vector<int> links;
+ links.push_back(4);
+ links.push_back(0);
+ links.push_back(0);
+ links.push_back(2);
+ links.push_back(-1);
+ links.push_back(-1);
+ Structure s;
+ make_structure(s, words, links);
+ print_structure(cout, s);
+ cerr << endl;
+}
+
+void
+test2()
+{
+ cerr << "TEST 2" << endl;
+ vector<Word> words;
+ words.push_back('e');
+ words.push_back('f');
+ words.push_back('x');
+ words.push_back('g');
+ words.push_back('h');
+ words.push_back('i');
+ vector<int> links;
+ links.push_back(4);
+ links.push_back(0);
+ links.push_back(3);
+ links.push_back(0);
+ links.push_back(-1);
+ links.push_back(-1);
+ Structure s;
+ make_structure(s, words, links);
+ print_structure(cout, s);
+ cerr << endl;
+}
+
+void
+test3()
+{
+ cerr << "TEST 3" << endl;
+
+ vector<Word> w1;
+ w1.push_back('a');
+ vector<int> l1;
+ l1.push_back(-1);
+ Structure s1;
+ make_structure(s1, w1, l1);
+ print_structure(cout, s1);
+ cout << endl;
+
+ vector<Word> w2;
+ w2.push_back('b');
+ w2.push_back('c');
+ vector<int> l2;
+ l2.push_back(-1);
+ l2.push_back(-1);
+ Structure s2;
+ make_structure(s2, w2, l2);
+ print_structure(cout, s2);
+ cout << endl;
+
+ vector<Word> w3;
+ w3.push_back('d');
+ vector<int> l3;
+ l3.push_back(-1);
+ Structure s3;
+ make_structure(s3, w3, l3);
+ print_structure(cout, s3);
+ cout << endl;
+
+ vector<Word> w4;
+ w4.push_back('e');
+ w4.push_back('f');
+ w4.push_back('g');
+ vector<int> l4;
+ l4.push_back(-1);
+ l4.push_back(0);
+ l4.push_back(0);
+ Structure s4;
+ make_structure(s4, w4, l4);
+ print_structure(cout, s4);
+ cout << endl;
+
+ vector<Word> w5;
+ w5.push_back('h');
+ vector<int> l5;
+ l5.push_back(-1);
+ Structure s5;
+ make_structure(s5, w5, l5);
+ print_structure(cout, s5);
+ cout << endl;
+}
+
+void
+test4()
+{
+ cerr << "TEST 3" << endl;
+ vector<Word> words;
+ words.push_back('a');
+ words.push_back('b');
+ words.push_back('c');
+ words.push_back('d');
+ words.push_back('e');
+ words.push_back('f');
+ words.push_back('g');
+ words.push_back('h');
+ words.push_back('i');
+ vector<int> links;
+ links.push_back(7);
+ links.push_back(3);
+ links.push_back(3);
+ links.push_back(7);
+ links.push_back(7);
+ links.push_back(4);
+ links.push_back(4);
+ links.push_back(-1);
+ links.push_back(-1);
+ Structure s;
+ make_structure(s, words, links);
+ print_structure(cout, s);
+ score_and_prune(&(s.back()), true);
+ cerr << endl;
+}
+
+void
+test5()
+{
+ cerr << "TEST 5" << endl;
+
+ vector<Word> w2;
+ w2.push_back('b');
+ w2.push_back('c');
+ vector<int> l2;
+ l2.push_back(-1);
+ l2.push_back(-1);
+ Structure s2;
+ make_structure(s2, w2, l2);
+ print_structure(cout, s2);
+ cout << endl;
+
+ vector<Word> w3;
+ w3.push_back('d');
+ vector<int> l3;
+ l3.push_back(-1);
+ Structure s3;
+ make_structure(s3, w3, l3);
+ print_structure(cout, s3);
+ cout << endl;
+
+ vector<Word> w4;
+ w4.push_back('e');
+ w4.push_back('f');
+ w4.push_back('g');
+ vector<int> l4;
+ l4.push_back(-1);
+ l4.push_back(0);
+ l4.push_back(0);
+ Structure s4;
+ make_structure(s4, w4, l4);
+ print_structure(cout, s4);
+ cout << endl;
+
+ reduce(&(s3.back()), s2, 'l');
+ print_structure(cout, s3);
+ cerr << endl;
+
+ reduce(&(s3.back()), s4, 'r');
+ print_structure(cout, s3);
+ cerr << endl;
+
+ score_and_prune(&(s3.back().right[0]));
+ cerr << endl;
+ print_structure(cout, s3);
+ cerr << endl;
+ score_and_prune(&(s3.back()));
+ cerr << endl;
+}
+
+int
+main(void)
+{
+ test1();
+ test2();
+ test3();
+ test4();
+ test5();
+
+ return 0;
+}
+
diff --git a/algorithms/shift_reduce_parser.cc b/algorithms/shift_reduce_parser.cc
new file mode 100644
index 0000000..563c8e4
--- /dev/null
+++ b/algorithms/shift_reduce_parser.cc
@@ -0,0 +1,127 @@
+#include <vector>
+#include <map>
+#include <utility>
+#include <vector>
+#include <iostream>
+
+
+using namespace std;
+
+typedef std::pair<char,char> Key;
+
+struct E
+{
+ vector<char> h;
+
+ Key get() {
+ char second = h.back();
+ h.pop_back();
+ char first = h.back();
+ h.pop_back();
+ return make_pair(first, second);
+ }
+ bool good() { return h.size() > 1; }
+ char add(char c) { h.push_back(c); }
+};
+
+struct P
+{
+ std::map<Key, char> actionTable_;
+
+ P()
+ {
+ createActionTable();
+ }
+
+ void r(E& e)
+ {
+ cout << "before: ";
+ for (auto it: e.h)
+ cout << it;
+ cout << endl;
+ bool stop = false;
+ while (e.good() && !stop) {
+ pair<char,char> k = e.get();
+ char action = actionTable_[k];
+ switch (action) {
+ case 'L':
+ e.add('f');
+ break;
+ case 'R':
+ e.add('f');
+ stop = true;
+ break;
+ case 'S':
+ e.add(k.first);
+ e.add(k.second);
+ stop = true;
+ break;
+ case 'I':
+ default:
+ e.add('u');
+ stop = true;
+ break;
+ }
+ }
+ cout << " after: ";
+ for (auto it: e.h)
+ cout << it;
+ cout << endl;
+ cout << "--" << endl;
+ }
+
+ void createActionTable()
+ {
+ // u ->
+ actionTable_[Key('u','u')] = 'S'; // Shift
+ actionTable_[Key('u','f')] = 'S';
+ actionTable_[Key('u','l')] = 'S';
+ actionTable_[Key('u','r')] = 'I'; // Illegal
+
+ // f ->
+ actionTable_[Key('f','f')] = 'S'; // ambiguous
+ actionTable_[Key('f','l')] = 'S';
+ actionTable_[Key('f','r')] = 'R'; // Right-reduce
+
+ // l ->
+ actionTable_[Key('l','f')] = 'L'; // Left-reduce
+ actionTable_[Key('l','l')] = 'S';
+ actionTable_[Key('l','r')] = 'I';
+
+ // r ->
+ actionTable_[Key('r','f')] = 'I';
+ actionTable_[Key('r','l')] = 'I';
+ actionTable_[Key('r','r')] = 'I';
+ }
+};
+
+
+int main(void)
+{
+ E e;
+ P p;
+
+ /*e.add('u');
+ p.r(e);
+
+ e.add('l');
+ p.r(e);
+
+ e.add('l');
+ p.r(e);
+
+ e.add('f');
+ p.r(e);
+
+ e.add('r');
+ p.r(e);*/
+
+ string s("frllllfrlllllllf");
+ for (auto it: s) {
+ e.add(it);
+ p.r(e);
+ }
+
+ return 0;
+}
+
diff --git a/algorithms/shift_reduce_parser.rb b/algorithms/shift_reduce_parser.rb
new file mode 100755
index 0000000..948d8c1
--- /dev/null
+++ b/algorithms/shift_reduce_parser.rb
@@ -0,0 +1,101 @@
+#!/usr/bin/env ruby
+
+
+class Item
+ attr_accessor :word, :type, :link
+ def initialize word, type
+ @word = word
+ @type = type
+ @link = nil
+ end
+end
+
+class MockShiftReduceParser
+ attr_accessor :state
+
+ def initialize
+ @state = nil
+
+ @a = {}
+ @a[nil] = {}
+ @a[nil][nil] = 'S'
+ @a[nil]['f'] = 'S'
+ @a[nil]['l'] = 'S'
+ @a[nil]['r'] = 'illegal'
+
+ @a['f'] = {}
+ @a['f']['f'] = 'S' # FIXME conflict
+ @a['f']['l'] = 'S'
+ @a['f']['r'] = 'R'
+
+ @a['l'] = {}
+ @a['l']['f'] = 'L'
+ @a['l']['l'] = 'S'
+ @a['l']['r'] = 'illegal'
+
+ @a['r'] = {}
+ @a['r']['f'] = 'illegal'
+ @a['r']['l'] = 'illegal'
+ @a['r']['r'] = 'illegal'
+ end
+
+ def advance type
+ if !@state
+ @state = type
+ puts "-/- -> S -> #{@state}"
+ return
+ end
+ action = @a[@state][type]
+ prev_state = @state
+ if action == 'S'
+ @state = type
+ elsif action == 'L' or action == 'R'
+ @state = 'f'
+ end
+ puts "#{prev_state}/#{type} -> #{action} -> #{@state}"
+
+ return action
+ end
+end
+
+
+parser = MockShiftReduceParser.new
+
+items = []
+head = nil
+stack = []
+
+while line = STDIN.gets
+ word, type = line.split
+ item = Item.new word, type
+ items << item
+ stack << item
+
+ head = item if !head
+
+ action = parser.advance item.type
+ if action == "S"
+ head = item
+ elsif action == "R"
+ _ = stack.pop
+ _.link = head
+ elsif action == "L"
+ head = item
+ puts stack.size
+ del = []
+ stack.each_with_index { |i,j|
+ if i.type == 'l'
+ i.link = head
+ del << j
+ end
+ }
+ del.reverse.each { |d| stack.delete_at(d) }
+ puts stack.size
+ end
+end
+
+stack.each { |i| i.link = head }
+
+puts "---"
+items.each { |j| if j.link then puts "#{j.word} #{j.link.word}" else puts "#{j.word} ?" end }
+
diff --git a/algorithms/shift_reduce_parser_data b/algorithms/shift_reduce_parser_data
new file mode 100644
index 0000000..c4714ab
--- /dev/null
+++ b/algorithms/shift_reduce_parser_data
@@ -0,0 +1,8 @@
+a f
+b l
+c l
+d f
+e f
+f r
+g r
+h f