From da646adc508083d69dea995ef8692840377fa61e Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Tue, 20 May 2014 14:42:33 +0200 Subject: cleanup --- intersect.rb | 209 ------------------------------------------------------ intersect1.rb | 221 ---------------------------------------------------------- intersect2.rb | 176 ---------------------------------------------- parse.rb | 176 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 176 insertions(+), 606 deletions(-) delete mode 100644 intersect.rb delete mode 100644 intersect1.rb delete mode 100644 intersect2.rb create mode 100644 parse.rb diff --git a/intersect.rb b/intersect.rb deleted file mode 100644 index 64b9696..0000000 --- a/intersect.rb +++ /dev/null @@ -1,209 +0,0 @@ -#!/usr/bin/env ruby - -require_relative './grammar.rb' -STDOUT.sync = true - - -class Chart - - def initialize n - @m = [] - (n+1).times { - _ = [] - (n+1).times { _ << [] } - @m << _ - } - end - - def at i, j - @m[i][j] - end - - def add rule_or_item, i, j, right, dot=0 - item = Item.new(rule_or_item) - item.span.left = i - item.span.right = right - item.dot = dot - at(i, j) << item - end -end - -class Span - attr_accessor :left, :right - - def initialize left=nil, right=nil - @left = left - @right = right - end -end - -class Item < Rule - attr_accessor :lhs, :rhs, :span, :dot - - def initialize rule_or_item - @lhs = rule_or_item.lhs.dup - @rhs = rule_or_item.rhs.dup - if rule_or_item.class == Item - @span = Span.new rule_or_item.span.left, rule_or_item.span.right - @dot = rule_or_item.dot - else - @span = Span.new - @dot = 0 - end - end - - def to_s - "#{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} [dot@#{@dot}] [arity=#{arity}] (#{@span.left}, #{@span.right})" - end -end - -def visit n, depth, skip=0 - (depth-skip).times { |i| - i += skip - 0.upto(n-(i+1)) { |j| - yield j, j+i+1 - } - } -end - -def init active_chart, passive_chart, grammar, input, n - # pre-fill passive chart w/ 0-arity rules - #grammar.rules.select { |r| r.rhs.first.class==T }.each { |r| - grammar.rules.select { |r| r.arity==0 }.each { |r| - input.each_index.select { |i| input[i].word==r.rhs.first.word }.each { |j| - k = 1 - if r.rhs.size > 1 - z = r.rhs.index { |i| i.class==NT } - if z - z -= 1 - else - z = r.rhs.size-1 - end - if input[j..j+z].map { |i| i.word } == r.rhs[0..z].map { |i| i.word } - k = z+1 - else - next - end - end - if k == r.rhs.size - passive_chart.add(r, j, j+k, j+k, k) - #else - # (j+k).upto(n) { |l| active_chart.add r, j, l, j+k, k } - end - } - } - # seed active chart - #s = grammar.rules.reject { |r| r.rhs.first.class!=NT } - #visit(n, n, 1) { |i,j| - # s.each { |r| active_chart.add(r, i, j, i) } - #} -end - -def scan item, passive_chart, input - while item.rhs[item.dot].class == T - break if item.span.right > input.size-1 - if item.rhs[item.dot].word == input[item.span.right].word - item.dot += 1 - item.span.right += 1 - break if item.dot == item.rhs.size - else - break - end - end -end - -def parse i, j, n, active_chart, passive_chart, input, grammar - grammar.rules.each { |r| - if r.rhs.first.class==T&&r.rhs.first.word==input[i].word - new_item = Item.new r - scan new_item, passive_chart, input - end - } - #active_chart.at(i,j).each { |active_item| - 1.upto(n) { |span| - break if span==(j-i) - i.upto(j-span) { |k| - STDERR.write " #{k},#{k+span}\n" - <<-DOC - passive_chart.at(k, k+span).each { |passive_item| - if active_item.rhs[active_item.dot].class==NT && passive_item.lhs.symbol == active_item.rhs[active_item.dot].symbol - next if not active_item.span.right==passive_item.span.left - new_item = Item.new active_item - new_item.span.right = passive_item.span.right - new_item.dot += 1 - scan new_item, passive_chart, input - if new_item.dot == new_item.rhs.size - passive_chart.at(i,j) << new_item if new_item.span.left==i&&new_item.span.right==j - else - # item is dead unless it has an NT to fill - active_chart.at(i,j) << new_item if new_item.span.left==i&&new_item.rhs[new_item.dot].class==NT - end - end - } - DOC - } - } - #} - # self-filling - to_add_active = [] - to_add_passive = [] - passive_chart.at(i,j).each { |passive_item| - active_chart.at(i,j).each { |active_item| - next if active_item.rhs[active_item.dot].class!=NT - if passive_item.lhs.symbol == active_item.rhs[active_item.dot].symbol - next if not active_item.span.right==passive_item.span.left - new_item = Item.new active_item - new_item.span.right = passive_item.span.right - new_item.dot += 1 - scan new_item, passive_chart, input - if new_item.dot == new_item.rhs.size - to_add_passive << new_item if new_item.span.left==i&&new_item.span.right==j - else - to_add_active << new_item - end - end - } - } - to_add_active.each { |item| active_chart.at(i,j) << item } - to_add_passive.each { |item| passive_chart.at(i,j) << item } -end - -def preprocess s - s.split.map { |i| T.new i } -end - -def main - input = preprocess 'ich sah ein kleines haus' - #input = preprocess 'lebensmittel schuld an europäischer inflation' - #input = preprocess 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .' - n = input.size - - puts 'reading grammar' - g = Grammar.new 'example/grammar' - #g = Grammar.new 'example/grammar.x' - #g = Grammar.new 'example/grammar.3.gz' # 4th segment of newstest2008 - - puts 'adding glue rules' - g.add_glue_rules - - #puts 'adding pass-through rules' - #g.add_pass_through_rules input - - puts 'initializing charts' - passive_chart = Chart.new n - active_chart = Chart.new n - init active_chart, passive_chart, g, input, n - - puts 'parsing' - visit(n, n, 1) { |i,j| - STDERR.write "span (#{i}, #{j})\n" - parse i, j, n, active_chart, passive_chart, input, g - } - - #puts "---\npassive chart" - #visit(n, n, 0) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts ' '+item.to_s if item.span.left==i&&item.span.right==j }; puts } -end - - -main - diff --git a/intersect1.rb b/intersect1.rb deleted file mode 100644 index fd465b0..0000000 --- a/intersect1.rb +++ /dev/null @@ -1,221 +0,0 @@ -#!/usr/bin/env ruby - -require_relative './grammar.rb' -STDOUT.sync = true - - -class Chart - attr_accessor :has - - def initialize n - @m = [] - (n+1).times { - _ = [] - (n+1).times { _ << [] } - @m << _ - } - @has = {} - end - - def at i, j - @m[i][j] - end - - def add rule_or_item, i, j, right, dot=0 - item = Item.new(rule_or_item) - item.span.left = i - item.span.right = right - item.dot = dot - at(i, j) << item - end -end - -class Span - attr_accessor :left, :right - - def initialize left=nil, right=nil - @left = left - @right = right - end -end - -class Item < Rule - attr_accessor :lhs, :rhs, :span, :dot - - def initialize rule_or_item - @lhs = rule_or_item.lhs.dup - @rhs = rule_or_item.rhs.dup - if rule_or_item.class == Item - @span = Span.new rule_or_item.span.left, rule_or_item.span.right - @dot = rule_or_item.dot - else - @span = Span.new - @dot = 0 - end - end - - def to_s - "#{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} [dot@#{@dot}] [arity=#{arity}] (#{@span.left}, #{@span.right})" - end -end - -def visit n, depth, skip=0 - (depth-skip).times { |i| - i += skip - 0.upto(n-(i+1)) { |j| - yield j, j+i+1 - } - } -end - -def preprocess s - s.split.map { |i| T.new i } -end - -def init input, n, active_chart, passive_chart, grammar - grammar.rules.select { |r| r.arity==0 }.each { |r| - input.each_index { |i| - if input[i, r.rhs.size].map{|x|x.word}==r.rhs.map{|x|x.word} - passive_chart.add Item.new(r), i, i+r.rhs.size, i+r.rhs.size, r.rhs.size - passive_chart.d["#{i},#{i+r.rhs.size},#{r.lhs.symbol}"] = true - end - } - } -end - -def scan item, passive_chart, input - while item.rhs[item.dot].class == T - break if item.span.right > input.size-1 - if item.rhs[item.dot].word == input[item.span.right].word - item.dot += 1 - item.span.right += 1 - break if item.dot == item.rhs.size - else - break - end - end -end - -def parse input, n, active_chart, passive_chart, grammar - rules_starting_with_t = grammar.rules.select { |r| r.arity > 0 && r.rhs.first.class==T } - rules_starting_with_nt = grammar.rules.select { |r| r.rhs.first.class == NT } - - # outer loop - 2.upto(n) { |span| - 0.upto(n-span) { |k| - - # try to apply rules starting with T - rules_starting_with_t.select { |r| r.rhs.first.word == input[k].word }.each { |r| - new_item = Item.new r - new_item.span.left = k - new_item.span.right = k - new_item.dot = 0 - scan new_item, passive_chart, input - active_chart.at(k, k+span) << new_item if new_item.span.right < k+span - } - - # seed active chart - rules_starting_with_nt.each { |r| - # no eps-productions! - next if r.rhs.size > span - #puts "#{r.rhs.size} #{span}" - new_item = Item.new r - new_item.span.left = k - new_item.span.right = k - new_item.dot = 0 - active_chart.at(k, k+span) << new_item - } - - puts "#{k} #{k+span}" - - # inner loop - puts "SIZE #{active_chart.at(k,k+span).size}" - while !active_chart.at(k,k+span).empty? - active_item = active_chart.at(k, k+span).pop - - 1.upto(span-1) { |span2| - k.upto((k+span)-span2) { |l| - - #puts "passive chart sz #{passive_chart.at(l, l+span2).size} #{l},#{l+span2}" - #passive_chart.at(l, l+span2).select { |item| item.lhs.symbol==active_item.rhs[active_item.dot].symbol }.each { |passive_item| - # if l==0&&span2==2 - # puts " #{passive_item}" - # end - if passive_chart.d["#{l},#{l+span2},#{active_item.rhs[active_item.dot].symbol}"] - if l == active_item.span.right - new_item = Item.new active_item - new_item.span.left = active_item.span.left - new_item.span.right = l+span2 - new_item.dot += 1 - scan new_item, passive_chart, input - if new_item.dot == new_item.rhs.size - if new_item.span.left==k&&new_item.span.right==(k+span) - #puts "NP@#{k},#{k+span} #{new_item}" - passive_chart.at(k,k+span) << new_item - end - else - if new_item.rhs[new_item.dot].class==NT && new_item.span.right < (k+span) - #puts "NA@#{k},#{k+span} #{new_item}" - active_chart.at(k,k+span) << new_item - end - end - end - #} - end - -# # try to advance items -# passive_chart.at(l, l+span2).each { |passive_item| -# active_chart.at(k, k+span).select { |active_item| active_item.rhs[active_item.dot].symbol == passive_item.lhs.symbol }.each { |active_item| -# if passive_item.span.left == active_item.span.right -# new_item = Item.new active_item -# new_item.span.left = active_item.span.left -# new_item.span.right = passive_item.span.right -# new_item.dot += 1 -# scan new_item, passive_chart, input -# if new_item.dot == new_item.rhs.size -# if new_item.span.left==k&&new_item.span.right==(k+span) -# puts "NP@#{k},#{k+span} #{new_item}" -# passive_chart.at(k,k+span) << new_item -# end -# else -# # item is dead unless it has an NT to fill -# if new_item.rhs[new_item.dot].class==NT && new_item.span.right < (k+span) -# puts "NA@#{k},#{k+span} #{new_item}" -# active_chart.at(k,k+span) << new_item -# end -# end -# end -# } -# } -# - - } - } - - end - - } - } -end - -def main - #input = preprocess 'ich sah ein kleines haus' - #input = preprocess 'lebensmittel schuld an europäischer inflation' - input = preprocess 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .' - n = input.size - g = Grammar.new 'example/grammar.3.gz' - #g.add_glue_rules - passive_chart = Chart.new n - active_chart = Chart.new n - init input, n, active_chart, passive_chart, g - parse input, n, active_chart, passive_chart, g - #puts "---\npassive chart" - #visit(n, n, 0) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts ' '+item.to_s if item.span.left==i&&item.span.right==j }; puts } - #puts "\nactive chart" - #visit(n, n, 0) { |i,j| puts "#{i},#{j}"; active_chart.at(i,j).each { |item| puts ' '+item.to_s }; puts } - -end - - -main - diff --git a/intersect2.rb b/intersect2.rb deleted file mode 100644 index 0d82c1d..0000000 --- a/intersect2.rb +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env ruby - -require_relative './grammar.rb' -STDOUT.sync = true - - -class Chart - - def initialize n - @m = [] - (n+1).times { - _ = [] - (n+1).times { _ << [] } - @m << _ - } - @b = {} - end - - def at i, j - @m[i][j] - end - - def add i, j, item - at(i,j) << item - @b["#{i},#{j},#{item.lhs.symbol}"] = true - end - - def has symbol, i, j - return @b["#{i},#{j},#{symbol}"] - end -end - -class Span - attr_accessor :left, :right - - def initialize left=nil, right=nil - @left = left - @right = right - end -end - -class Item < Rule - attr_accessor :lhs, :rhs, :span, :dot - - def initialize rule_or_item, left, right, dot - @lhs = rule_or_item.lhs - @rhs = rule_or_item.rhs.dup - @span = Span.new left, right - @dot = dot - end - - def to_s - "#{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} [dot@#{@dot}] [arity=#{arity}] (#{@span.left}, #{@span.right})" - end -end - -def init input, n, active_chart, passive_chart, grammar - grammar.flat.each { |r| - input.each_index { |i| - if input[i, r.rhs.size] == r.rhs.map { |x| x.word } - new_item = Item.new r, i, i+r.rhs.size, r.rhs.size - passive_chart.add i, i+r.rhs.size, new_item - end - } - } -end - -def scan item, input, limit, passive_chart - while item.rhs[item.dot].class == T && item.span.right < limit - if item.rhs[item.dot].word == input[item.span.right] - item.dot += 1 - item.span.right += 1 - else - break - end - end -end - -def parse input, n, active_chart, passive_chart, grammar - 2.upto(n) { |span| # outer loop - 0.upto(n-span) { |k| - - puts " span(#{k},#{k+span})" - - # try to apply rules starting with T - grammar.mixed.select { |r| r.rhs.first.word == input[k] }.each { |r| - new_item = Item.new r, k, k, 0 - scan new_item, input, k+span, passive_chart - active_chart.at(k,k+span) << new_item - } - - # seed active chart - grammar.rewrite.each { |r| - next if r.rhs.size > span - active_chart.at(k,k+span) << Item.new(r, k, k, 0) - } - - active_chart.at(k,k+span).each { |active_item| - next if active_item.rhs[active_item.dot].class==T - # inner loop - 1.upto(span-1) { |span2| - k.upto((k+span)-span2) { |l| - - if passive_chart.has active_item.rhs[active_item.dot].symbol, l, l+span2 - if l == active_item.span.right - new_item = Item.new active_item, active_item.span.left, l+span2, active_item.dot+1 - scan new_item, input, k+span, passive_chart - if new_item.dot == new_item.rhs.size # done with item - if new_item.span.left == k && new_item.span.right == k+span - passive_chart.add k, k+span, new_item - end - else - if new_item.rhs[new_item.dot].class == NT && new_item.span.right+(new_item.rhs.size-(new_item.dot)) <= k+span - active_chart.at(k,k+span) << new_item - end - end - end - end - } - } - } - - # 'self-filling' step - passive_chart.at(k,k+span).each { |passive_item| - active_chart.at(k,k+span).each { |active_item| - next if active_item.rhs[active_item.dot].class!=NT - if passive_item.lhs.symbol == active_item.rhs[active_item.dot].symbol - next if not active_item.span.right==passive_item.span.left - new_item = Item.new active_item, active_item.span.left, passive_item.span.right, active_item.dot+1 - scan new_item, input, k+span, passive_chart - if new_item.dot == new_item.rhs.size - if new_item.span.left == k && new_item.span.right == k+span - passive_chart.add k, k+span, new_item - else - puts "#{new_item}" # FIXME never happens - end - else - if new_item.rhs[new_item.dot].class == NT && new_item.span.right+(new_item.rhs.size-(new_item.dot)) <= k+span - puts "NA@#{k},#{k+span} #{new_item}" - active_chart.at(k,k+span) << new_item - end - end - end - } - } - } - } -end - -def visit n, depth, skip=0 # FIXME - (depth-skip).times { |i| - i += skip - 0.upto(n-(i+1)) { |j| - yield j, j+i+1 - } - } -end - -def main - #input = 'ich sah ein kleines haus'.split - input = 'lebensmittel schuld an europäischer inflation'.split - #input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split - n = input.size - grammar = Grammar.new 'example/grammar.x' - grammar.add_glue_rules - passive_chart = Chart.new n - active_chart = Chart.new n - init input, n, active_chart, passive_chart, grammar - parse input, n, active_chart, passive_chart, grammar - puts "---\npassive chart" - visit(n, n, 0) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts ' '+item.to_s }; puts } -end - - -main - diff --git a/parse.rb b/parse.rb new file mode 100644 index 0000000..0d82c1d --- /dev/null +++ b/parse.rb @@ -0,0 +1,176 @@ +#!/usr/bin/env ruby + +require_relative './grammar.rb' +STDOUT.sync = true + + +class Chart + + def initialize n + @m = [] + (n+1).times { + _ = [] + (n+1).times { _ << [] } + @m << _ + } + @b = {} + end + + def at i, j + @m[i][j] + end + + def add i, j, item + at(i,j) << item + @b["#{i},#{j},#{item.lhs.symbol}"] = true + end + + def has symbol, i, j + return @b["#{i},#{j},#{symbol}"] + end +end + +class Span + attr_accessor :left, :right + + def initialize left=nil, right=nil + @left = left + @right = right + end +end + +class Item < Rule + attr_accessor :lhs, :rhs, :span, :dot + + def initialize rule_or_item, left, right, dot + @lhs = rule_or_item.lhs + @rhs = rule_or_item.rhs.dup + @span = Span.new left, right + @dot = dot + end + + def to_s + "#{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} [dot@#{@dot}] [arity=#{arity}] (#{@span.left}, #{@span.right})" + end +end + +def init input, n, active_chart, passive_chart, grammar + grammar.flat.each { |r| + input.each_index { |i| + if input[i, r.rhs.size] == r.rhs.map { |x| x.word } + new_item = Item.new r, i, i+r.rhs.size, r.rhs.size + passive_chart.add i, i+r.rhs.size, new_item + end + } + } +end + +def scan item, input, limit, passive_chart + while item.rhs[item.dot].class == T && item.span.right < limit + if item.rhs[item.dot].word == input[item.span.right] + item.dot += 1 + item.span.right += 1 + else + break + end + end +end + +def parse input, n, active_chart, passive_chart, grammar + 2.upto(n) { |span| # outer loop + 0.upto(n-span) { |k| + + puts " span(#{k},#{k+span})" + + # try to apply rules starting with T + grammar.mixed.select { |r| r.rhs.first.word == input[k] }.each { |r| + new_item = Item.new r, k, k, 0 + scan new_item, input, k+span, passive_chart + active_chart.at(k,k+span) << new_item + } + + # seed active chart + grammar.rewrite.each { |r| + next if r.rhs.size > span + active_chart.at(k,k+span) << Item.new(r, k, k, 0) + } + + active_chart.at(k,k+span).each { |active_item| + next if active_item.rhs[active_item.dot].class==T + # inner loop + 1.upto(span-1) { |span2| + k.upto((k+span)-span2) { |l| + + if passive_chart.has active_item.rhs[active_item.dot].symbol, l, l+span2 + if l == active_item.span.right + new_item = Item.new active_item, active_item.span.left, l+span2, active_item.dot+1 + scan new_item, input, k+span, passive_chart + if new_item.dot == new_item.rhs.size # done with item + if new_item.span.left == k && new_item.span.right == k+span + passive_chart.add k, k+span, new_item + end + else + if new_item.rhs[new_item.dot].class == NT && new_item.span.right+(new_item.rhs.size-(new_item.dot)) <= k+span + active_chart.at(k,k+span) << new_item + end + end + end + end + } + } + } + + # 'self-filling' step + passive_chart.at(k,k+span).each { |passive_item| + active_chart.at(k,k+span).each { |active_item| + next if active_item.rhs[active_item.dot].class!=NT + if passive_item.lhs.symbol == active_item.rhs[active_item.dot].symbol + next if not active_item.span.right==passive_item.span.left + new_item = Item.new active_item, active_item.span.left, passive_item.span.right, active_item.dot+1 + scan new_item, input, k+span, passive_chart + if new_item.dot == new_item.rhs.size + if new_item.span.left == k && new_item.span.right == k+span + passive_chart.add k, k+span, new_item + else + puts "#{new_item}" # FIXME never happens + end + else + if new_item.rhs[new_item.dot].class == NT && new_item.span.right+(new_item.rhs.size-(new_item.dot)) <= k+span + puts "NA@#{k},#{k+span} #{new_item}" + active_chart.at(k,k+span) << new_item + end + end + end + } + } + } + } +end + +def visit n, depth, skip=0 # FIXME + (depth-skip).times { |i| + i += skip + 0.upto(n-(i+1)) { |j| + yield j, j+i+1 + } + } +end + +def main + #input = 'ich sah ein kleines haus'.split + input = 'lebensmittel schuld an europäischer inflation'.split + #input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split + n = input.size + grammar = Grammar.new 'example/grammar.x' + grammar.add_glue_rules + passive_chart = Chart.new n + active_chart = Chart.new n + init input, n, active_chart, passive_chart, grammar + parse input, n, active_chart, passive_chart, grammar + puts "---\npassive chart" + visit(n, n, 0) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts ' '+item.to_s }; puts } +end + + +main + -- cgit v1.2.3