diff options
author | Patrick Simianer <p@simianer.de> | 2014-05-02 17:59:32 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-05-02 17:59:32 +0200 |
commit | 1503ee164c9f7fc84388f38c014747067a339f36 (patch) | |
tree | 4759b139e4346c46a503a00c7443b2af0feaf653 | |
parent | 2ff2b2a3446f0b1583b77bc5dd926473f95199f6 (diff) |
first more or less functional version
-rw-r--r-- | grammar | 9 | ||||
-rw-r--r-- | grammar.rb | 96 | ||||
-rw-r--r-- | hg.rb | 5 | ||||
-rw-r--r-- | intersect.rb | 174 |
4 files changed, 284 insertions, 0 deletions
@@ -0,0 +1,9 @@ +[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 +[NP] ||| ich ||| i ||| logp=-0.5 use_i=1.0 +[NP] ||| ein [NN,1] ||| a [1] ||| logp=0 use_a=1.0 +[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 +[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 +[JJ] ||| kleines ||| small ||| logp=0 use_small=1.0 +[JJ] ||| kleines ||| little ||| logp=0 use_little=1.0 +[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 +[V] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0 diff --git a/grammar.rb b/grammar.rb new file mode 100644 index 0000000..02f4270 --- /dev/null +++ b/grammar.rb @@ -0,0 +1,96 @@ +require 'nlp_ruby' + +class Terminal + attr_accessor :w + + def initialize s + @w = s + end + + def to_s + "T<#{@w}>" + end +end + +class NonTerminal + attr_accessor :sym, :n + + def initialize s, n=1 + @sym = s + @n = n + end + + def to_s + "NT<#{sym}>" + end +end + +class Span + attr_accessor :left, :right + + def initialize left=-1, right=-1 + @left = left + @right = right + end +end + +class Rule + attr_accessor :lhs, :rhs, :span + + def initialize lhs=nil, rhs=nil, span=Span.new + @lhs = '' + @rhs = [] + @span = span + end + + def to_s + "#{lhs} -> #{rhs.map{|i|i.to_s}.join ' '} (#{arity}) (#{@span.left}, #{@span.right})" + end + + def arity + rhs.reject { |i| i.class == Terminal }.size + end + + def from_s s + a = splitpipe s, 3 + @lhs = NonTerminal.new a[0].strip.gsub!(/(\[|\])/, "") + a[1].split.each { |i| + i.strip! + if i[0]=='[' && i[i.size-1] == ']' + @rhs << NonTerminal.new(i.gsub!(/(\[|\])/, "").split(',')[0]) + else + @rhs << Terminal.new(i) + end + } + @span = Span.new + end + + def self.from_s s + r = self.new + r.from_s s + return r + end +end + +class Grammar + attr_accessor :rules + + def initialize fn + @rules = [] + l = ReadFile.readlines_strip fn + l.each { |i| + @rules << Rule.from_s(i) + } + end + + def to_s + s = '' + @rules.each { |r| s += r.to_s+"\n" } + end +end + + + + + + @@ -0,0 +1,5 @@ +require nlp_ruby + +def s2lattice s="ich sah ein kleines haus" + tokenize s +end diff --git a/intersect.rb b/intersect.rb new file mode 100644 index 0000000..8a583da --- /dev/null +++ b/intersect.rb @@ -0,0 +1,174 @@ +#!/usr/bin/env ruby + +require_relative './grammar.rb' + +# interescts a WFSA (WFST) with a WSCFG to produce a hypergraph + + + +class Chart + attr_accessor :m + + def initialize input + @m = [] + (input.size+1).times { + _ = [] + (input.size+1).times { _ << [] } + @m << _ + } + end + + def at span + return @m[span.left][span.right] + end + + def at i,j + return @m[i][j] + end +end + +class Item < Rule + attr_accessor :lhs, :rhs, :span, :dot + + def initialize rule, dot=-1 + @lhs = rule.lhs.dup + @rhs = rule.rhs.dup + @span = Span.new rule.span.left, rule.span.right + @dot = dot + if rule.class==Item + @dot = rule.dot + end + end + + def to_s + "#{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} (dot:#{@dot}) (a:#{arity}) (#{@span.left}, #{@span.right})" + end +end + + + +#def shift +#end +#def reduce +#end +#def init + #axioms,goals etc +#end +#def parse +#end + + +g = Grammar.new 'grammar' +input = "ich sah ein kleines haus".split.map { |i| Terminal.new i } +#start = NonTerminal.new 'S' + +passive_chart = Chart.new input +active_chart = Chart.new input + + + +# 1st row: arity0 rules +unary_rules = g.rules.reject { |r| r.arity>0 } +input.each_with_index { |t,j| + unary_rules.each { |r| + if r.rhs.first.w == t.w + passive_chart.at(j,j+1) << Item.new(r) + passive_chart.at(j,j+1).last.span.left = j + passive_chart.at(j,j+1).last.span.right = j+1 + passive_chart.at(j,j+1).last.dot = 1 + end + } +} + + + +# pre-fill active chart with non 0arity rules starting with a (single(!)) T +g.rules.reject{|r| r.rhs[0].class==NonTerminal||r.arity==0}.each {|r| + input.each_with_index { |i,j| + if i.w==r.rhs.first.w + (j+2).upto(input.size) { |k| # no empty NT! + active_chart.at(j,k) << Item.new(r) + active_chart.at(j,k).last.span.left = j + active_chart.at(j,k).last.span.right = k #j+1 # k + active_chart.at(j,k).last.dot = 1 + } + end + } +} +# pre-fill active chart +s = g.rules.reject { |r| r.rhs.first.class!=NonTerminal}#.reject{|r| r.lhs.sym == 'S'} +(input.size).times { |k| + 0.upto(input.size-(k+2)) { |i| + s.each { |r| + active_chart.at(i,i+k+2) << Item.new(r) + active_chart.at(i,i+k+2).last.span.left = i + active_chart.at(i,i+k+2).last.span.right = i+k+2 + active_chart.at(i,i+k+2).last.dot = 0 + } + } +} + + + +# parse +puts "parse" +def visit i, j, sz, active_chart, passive_chart, g, input + puts "| #{i},#{j}" + + # SCAN + active_chart.at(i,j).each { |item| + if item.rhs[item.dot].class == Terminal + if item.rhs[item.dot].w == input[item.span.left+item.dot].w + item.dot += 1 + if item.dot == item.rhs.size + passive_chart.at(i,j) << Item.new(item) + #passive_chart.at(i,j).last.span.right = item.span.left+item.dot + end + end + end + } + + 1.upto(sz) { |span| + break if span==(j-i) + i.upto(j-span) { |k| + puts " #{k},#{k+span} (##{span})" + # COMPLETE + active_chart.at(i,j).each { |active_item| + passive_chart.at(k, k+span).each { |passive_item| + if active_item.rhs[active_item.dot].class==NonTerminal && passive_item.lhs.sym == active_item.rhs[active_item.dot].sym + active_item.span.right = passive_item.span.right if passive_item.span.right > active_item.span.right + active_item.dot += 1 + if active_item.dot == active_item.rhs.size + passive_chart.at(i,j) << Item.new(active_item) + end + end + } + } + } + + # SCAN + active_chart.at(i,j).each { |item| + if item.rhs[item.dot].class == Terminal + if item.rhs[item.dot].w == input[item.span.left+item.dot].w + item.dot += 1 + if item.dot == item.rhs.size + passive_chart.at(i,j) << Item.new(item) + end + end + end + } + + } +end + +(input.size).times { |k| + 0.upto(input.size-(k+2)) { |i| + visit i, i+k+2, input.size, active_chart, passive_chart, g, input + } +} +puts "---" + +active_chart.at(0,5).each { |item| + puts item.to_s +} + |