From 1651cfb83e0d95f9ada0b9acae6ede84e605720b Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Thu, 12 Jun 2014 16:25:33 +0200 Subject: save some memory --- README.md | 7 ++++-- grammar.rb | 32 +++++++++++++-------------- hg.rb | 4 ++++ parse.rb | 69 +++++++++++++++++++++++++++++++---------------------------- test_hg.rb | 7 +++--- test_parse.rb | 2 +- 6 files changed, 64 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index edf89bd..fa50235 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,9 @@ helpful stuff todo ==== - * animate parsing? - * integrate with HG + * integrate with HG (chart to json) + * kbest + * feature interface + * (global) word ids instead of strings + * animate parsing diff --git a/grammar.rb b/grammar.rb index a8ec07c..6c95de2 100644 --- a/grammar.rb +++ b/grammar.rb @@ -41,11 +41,11 @@ end class Rule attr_accessor :lhs, :rhs, :target, :map, :f - def initialize lhs=nil, rhs=nil, target=nil, map=nil, f=SparseVector.new + def initialize lhs=NT.new, rhs=[], target=[], map=[], f=SparseVector.new @lhs = lhs @rhs = rhs @target = target - @map = (map ? map : []) + @map = map @f = f @arity_ = nil end @@ -59,30 +59,30 @@ class Rule return @arity_ end - def read_right_ s, fill_map=false - _ = [] + def read_right_ s, create_map=false + a = [] s.split.each { |x| x.strip! - if x[0]=='[' && x[x.size-1] == ']' - _ << NT.from_s(x) - @map << _.last.index if fill_map + if x[0] == '[' && x[x.size-1] == ']' + a << NT.from_s(x) + @map << a.last.index if create_map else - _ << T.new(x) + a << T.new(x) end } - return _ + return a end def from_s s lhs, rhs, target, f = splitpipe s, 3 - @lhs = NT.from_s lhs - @rhs = read_right_ rhs + @lhs = NT.from_s lhs + @rhs = read_right_ rhs @target = read_right_ target, true - @f = (f ? SparseVector.from_kv(f) : nil) + @f = (f ? SparseVector.from_kv(f) : nil) end - def self.from_s_x s - r = self.new + def self.from_s s + r = Rule.new r.from_s s return r end @@ -110,9 +110,7 @@ class Grammar end def to_s - s = '' - @rules.each { |r| s += r.to_s+"\n" } - return s + @rules.map { |r| r.to_s }.join "\n" end def add_glue_rules diff --git a/hg.rb b/hg.rb index b73d860..abd8777 100644 --- a/hg.rb +++ b/hg.rb @@ -127,6 +127,10 @@ def HG::viterbi_path hypergraph, root, semiring=ViterbiSemiring.new return best_path, toposorted.last.score end +def HG::k_best hypergraph, root, semiring=nil + #TODO +end + def HG::all_paths hypergraph, root toposorted = topological_sort hypergraph.nodes paths = [[]] diff --git a/parse.rb b/parse.rb index 454bb14..c9cbd7c 100644 --- a/parse.rb +++ b/parse.rb @@ -1,17 +1,23 @@ -#!/usr/bin/env ruby - require 'nlp_ruby' require_relative 'grammar' +def visit i, l, r, x=0 + i.upto(r-x) { |span| + l.upto(r-span) { |k| + yield k, k+span + } + } +end + class Chart def initialize n @m = [] (n+1).times { - _ = [] - (n+1).times { _ << [] } - @m << _ + a = [] + (n+1).times { a << [] } + @m << a } @b = {} end @@ -28,6 +34,10 @@ class Chart def has symbol, i, j return @b["#{i},#{j},#{symbol}"] end + + def to_json + #TODO + end end Span = Struct.new(:left, :right) @@ -36,27 +46,28 @@ class Item < Grammar::Rule attr_accessor :left, :right, :tail_spans, :dot, :f def initialize rule_or_item, left, right, dot - @lhs = Grammar::NT.new(rule_or_item.lhs.symbol, rule_or_item.lhs.index) - @left = left - @right = right - @rhs = [] - @tail_spans = {} - @f = rule_or_item.f - @map = (rule_or_item.map ? rule_or_item.map.dup : []) - rule_or_item.rhs.each_with_index { |x,i| - if x.class == Grammar::T - @rhs << Grammar::T.new(x.word) - end + @lhs = Grammar::NT.new rule_or_item.lhs.symbol, rule_or_item.lhs.index + @left = left + @right = right + @rhs = [] + @tail_spans = {} # refers to source side, use @map + @f = rule_or_item.f + @map = (rule_or_item.map ? rule_or_item.map.dup : []) + rule_or_item.rhs.each_with_index { |x,i| # duplicate rhs partially + @rhs << x if x.class == Grammar::NT - @rhs << Grammar::NT.new(x.symbol, x.index) begin - @tail_spans[i] = rule_or_item.tail_spans[i].dup + if i >= dot + @tail_spans[i] = Span.new(-1, -1) + else + @tail_spans[i] = rule_or_item.tail_spans[i].dup + end rescue @tail_spans[i] = Span.new(-1, -1) end end } - @dot = dot + @dot = dot @target = rule_or_item.target end @@ -88,14 +99,6 @@ def scan item, input, limit, passive_chart return true end -def visit i, l, r, x=0 - i.upto(r-x) { |span| - l.upto(r-span) { |k| - yield k, k+span - } - } -end - def parse input, n, active_chart, passive_chart, grammar visit(1, 0, n) { |i,j| @@ -112,7 +115,7 @@ def parse input, n, active_chart, passive_chart, grammar next if r.rhs.size > j-i active_chart.at(i,j) << Item.new(r, i, i, 0) } - + # parse new_symbols = [] remaining_items = [] @@ -148,11 +151,11 @@ def parse input, n, active_chart, passive_chart, grammar # 'self-filling' step new_symbols.each { |s| - remaining_items.each { |active_item| - next if active_item.dot!=0 - next if active_item.rhs[active_item.dot].class!=Grammar::NT - if active_item.rhs[active_item.dot].symbol == s - new_item = Item.new active_item, i, j, active_item.dot+1 + remaining_items.each { |item| + next if item.dot!=0 + next if item.rhs[item.dot].class!=Grammar::NT + if item.rhs[item.dot].symbol == s + new_item = Item.new item, i, j, item.dot+1 new_item.tail_spans[new_item.dot-1] = Span.new(i,j) if new_item.dot==new_item.rhs.size new_symbols << new_item.lhs.symbol if !new_symbols.include? new_item.lhs.symbol diff --git a/test_hg.rb b/test_hg.rb index b72bd85..f4e2ef8 100755 --- a/test_hg.rb +++ b/test_hg.rb @@ -4,17 +4,16 @@ require_relative 'hg' semiring = ViterbiSemiring.new -hypergraph, nodes_by_id = HG::read_hypergraph_from_json('x.json', semiring, true) +hypergraph, nodes_by_id = HG::read_hypergraph_from_json('example/3/3.json', semiring, true) path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring s = HG::derive path, path.last.head, [] puts "#{s.map { |i| i.word }.join ' '}" puts Math.log score puts - #hypergraph.reset #paths = HG::all_paths hypergraph, nodes_by_id[-1] -#paths.each { |p| +#paths.each_with_index { |p,i| # s = HG::derive p, p.last.head, [] -# puts "#{s.map { |i| i.word }.join ' '}" +# puts "#{i+1}. #{s.map { |i| i.word }.join ' '}" #} diff --git a/test_parse.rb b/test_parse.rb index 18afd55..0187675 100755 --- a/test_parse.rb +++ b/test_parse.rb @@ -50,7 +50,7 @@ def main n = input.size STDERR.write "> reading grammar\n" - grammar = Grammar::Grammar.new 'example/grammars/grammar.3.gz' + grammar = Grammar::Grammar.new 'example/3/grammar.3.gz' STDERR.write ">> adding glue grammar\n" grammar.add_glue_rules STDERR.write ">> adding pass-through grammar\n" -- cgit v1.2.3