summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-05-02 17:59:32 +0200
committerPatrick Simianer <p@simianer.de>2014-05-02 17:59:32 +0200
commit1503ee164c9f7fc84388f38c014747067a339f36 (patch)
tree4759b139e4346c46a503a00c7443b2af0feaf653
parent2ff2b2a3446f0b1583b77bc5dd926473f95199f6 (diff)
first more or less functional version
-rw-r--r--grammar9
-rw-r--r--grammar.rb96
-rw-r--r--hg.rb5
-rw-r--r--intersect.rb174
4 files changed, 284 insertions, 0 deletions
diff --git a/grammar b/grammar
new file mode 100644
index 0000000..fd4d01c
--- /dev/null
+++ b/grammar
@@ -0,0 +1,9 @@
+[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0
+[NP] ||| ich ||| i ||| logp=-0.5 use_i=1.0
+[NP] ||| ein [NN,1] ||| a [1] ||| logp=0 use_a=1.0
+[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1
+[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1
+[JJ] ||| kleines ||| small ||| logp=0 use_small=1.0
+[JJ] ||| kleines ||| little ||| logp=0 use_little=1.0
+[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0
+[V] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0
diff --git a/grammar.rb b/grammar.rb
new file mode 100644
index 0000000..02f4270
--- /dev/null
+++ b/grammar.rb
@@ -0,0 +1,96 @@
+require 'nlp_ruby'
+
+class Terminal
+ attr_accessor :w
+
+ def initialize s
+ @w = s
+ end
+
+ def to_s
+ "T<#{@w}>"
+ end
+end
+
+class NonTerminal
+ attr_accessor :sym, :n
+
+ def initialize s, n=1
+ @sym = s
+ @n = n
+ end
+
+ def to_s
+ "NT<#{sym}>"
+ end
+end
+
+class Span
+ attr_accessor :left, :right
+
+ def initialize left=-1, right=-1
+ @left = left
+ @right = right
+ end
+end
+
+class Rule
+ attr_accessor :lhs, :rhs, :span
+
+ def initialize lhs=nil, rhs=nil, span=Span.new
+ @lhs = ''
+ @rhs = []
+ @span = span
+ end
+
+ def to_s
+ "#{lhs} -> #{rhs.map{|i|i.to_s}.join ' '} (#{arity}) (#{@span.left}, #{@span.right})"
+ end
+
+ def arity
+ rhs.reject { |i| i.class == Terminal }.size
+ end
+
+ def from_s s
+ a = splitpipe s, 3
+ @lhs = NonTerminal.new a[0].strip.gsub!(/(\[|\])/, "")
+ a[1].split.each { |i|
+ i.strip!
+ if i[0]=='[' && i[i.size-1] == ']'
+ @rhs << NonTerminal.new(i.gsub!(/(\[|\])/, "").split(',')[0])
+ else
+ @rhs << Terminal.new(i)
+ end
+ }
+ @span = Span.new
+ end
+
+ def self.from_s s
+ r = self.new
+ r.from_s s
+ return r
+ end
+end
+
+class Grammar
+ attr_accessor :rules
+
+ def initialize fn
+ @rules = []
+ l = ReadFile.readlines_strip fn
+ l.each { |i|
+ @rules << Rule.from_s(i)
+ }
+ end
+
+ def to_s
+ s = ''
+ @rules.each { |r| s += r.to_s+"\n" }
+ end
+end
+
+
+
+
+
+
diff --git a/hg.rb b/hg.rb
new file mode 100644
index 0000000..f4a9cc4
--- /dev/null
+++ b/hg.rb
@@ -0,0 +1,5 @@
+require nlp_ruby
+
+def s2lattice s="ich sah ein kleines haus"
+ tokenize s
+end
diff --git a/intersect.rb b/intersect.rb
new file mode 100644
index 0000000..8a583da
--- /dev/null
+++ b/intersect.rb
@@ -0,0 +1,174 @@
+#!/usr/bin/env ruby
+
+require_relative './grammar.rb'
+
+# interescts a WFSA (WFST) with a WSCFG to produce a hypergraph
+
+
+
+class Chart
+ attr_accessor :m
+
+ def initialize input
+ @m = []
+ (input.size+1).times {
+ _ = []
+ (input.size+1).times { _ << [] }
+ @m << _
+ }
+ end
+
+ def at span
+ return @m[span.left][span.right]
+ end
+
+ def at i,j
+ return @m[i][j]
+ end
+end
+
+class Item < Rule
+ attr_accessor :lhs, :rhs, :span, :dot
+
+ def initialize rule, dot=-1
+ @lhs = rule.lhs.dup
+ @rhs = rule.rhs.dup
+ @span = Span.new rule.span.left, rule.span.right
+ @dot = dot
+ if rule.class==Item
+ @dot = rule.dot
+ end
+ end
+
+ def to_s
+ "#{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} (dot:#{@dot}) (a:#{arity}) (#{@span.left}, #{@span.right})"
+ end
+end
+
+
+
+#def shift
+#end
+#def reduce
+#end
+#def init
+ #axioms,goals etc
+#end
+#def parse
+#end
+
+
+g = Grammar.new 'grammar'
+input = "ich sah ein kleines haus".split.map { |i| Terminal.new i }
+#start = NonTerminal.new 'S'
+
+passive_chart = Chart.new input
+active_chart = Chart.new input
+
+
+
+# 1st row: arity0 rules
+unary_rules = g.rules.reject { |r| r.arity>0 }
+input.each_with_index { |t,j|
+ unary_rules.each { |r|
+ if r.rhs.first.w == t.w
+ passive_chart.at(j,j+1) << Item.new(r)
+ passive_chart.at(j,j+1).last.span.left = j
+ passive_chart.at(j,j+1).last.span.right = j+1
+ passive_chart.at(j,j+1).last.dot = 1
+ end
+ }
+}
+
+
+
+# pre-fill active chart with non 0arity rules starting with a (single(!)) T
+g.rules.reject{|r| r.rhs[0].class==NonTerminal||r.arity==0}.each {|r|
+ input.each_with_index { |i,j|
+ if i.w==r.rhs.first.w
+ (j+2).upto(input.size) { |k| # no empty NT!
+ active_chart.at(j,k) << Item.new(r)
+ active_chart.at(j,k).last.span.left = j
+ active_chart.at(j,k).last.span.right = k #j+1 # k
+ active_chart.at(j,k).last.dot = 1
+ }
+ end
+ }
+}
+# pre-fill active chart
+s = g.rules.reject { |r| r.rhs.first.class!=NonTerminal}#.reject{|r| r.lhs.sym == 'S'}
+(input.size).times { |k|
+ 0.upto(input.size-(k+2)) { |i|
+ s.each { |r|
+ active_chart.at(i,i+k+2) << Item.new(r)
+ active_chart.at(i,i+k+2).last.span.left = i
+ active_chart.at(i,i+k+2).last.span.right = i+k+2
+ active_chart.at(i,i+k+2).last.dot = 0
+ }
+ }
+}
+
+
+
+# parse
+puts "parse"
+def visit i, j, sz, active_chart, passive_chart, g, input
+ puts "| #{i},#{j}"
+
+ # SCAN
+ active_chart.at(i,j).each { |item|
+ if item.rhs[item.dot].class == Terminal
+ if item.rhs[item.dot].w == input[item.span.left+item.dot].w
+ item.dot += 1
+ if item.dot == item.rhs.size
+ passive_chart.at(i,j) << Item.new(item)
+ #passive_chart.at(i,j).last.span.right = item.span.left+item.dot
+ end
+ end
+ end
+ }
+
+ 1.upto(sz) { |span|
+ break if span==(j-i)
+ i.upto(j-span) { |k|
+ puts " #{k},#{k+span} (##{span})"
+ # COMPLETE
+ active_chart.at(i,j).each { |active_item|
+ passive_chart.at(k, k+span).each { |passive_item|
+ if active_item.rhs[active_item.dot].class==NonTerminal && passive_item.lhs.sym == active_item.rhs[active_item.dot].sym
+ active_item.span.right = passive_item.span.right if passive_item.span.right > active_item.span.right
+ active_item.dot += 1
+ if active_item.dot == active_item.rhs.size
+ passive_chart.at(i,j) << Item.new(active_item)
+ end
+ end
+ }
+ }
+ }
+
+ # SCAN
+ active_chart.at(i,j).each { |item|
+ if item.rhs[item.dot].class == Terminal
+ if item.rhs[item.dot].w == input[item.span.left+item.dot].w
+ item.dot += 1
+ if item.dot == item.rhs.size
+ passive_chart.at(i,j) << Item.new(item)
+ end
+ end
+ end
+ }
+
+ }
+end
+
+(input.size).times { |k|
+ 0.upto(input.size-(k+2)) { |i|
+ visit i, i+k+2, input.size, active_chart, passive_chart, g, input
+ }
+}
+puts "---"
+
+active_chart.at(0,5).each { |item|
+ puts item.to_s
+}
+