diff options
author | Patrick Simianer <p@simianer.de> | 2014-06-04 20:26:06 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-06-04 20:26:06 +0200 |
commit | a10db22ce00bd004682a00322b4d177b694082b7 (patch) | |
tree | 1a97ab371137cd459418f12db00237f0039fada1 | |
parent | dbc15de7a63b939d7c3c51c39b34286aed56739f (diff) |
refactoring, e.g. edges have scores, not weights
-rw-r--r-- | example/json/test.json | 42 | ||||
-rw-r--r-- | grammar.rb | 24 | ||||
-rw-r--r-- | hg.rb | 23 | ||||
-rwxr-xr-x | test.rb | 4 |
4 files changed, 46 insertions, 47 deletions
diff --git a/example/json/test.json b/example/json/test.json index bc422c6..aa0b45e 100644 --- a/example/json/test.json +++ b/example/json/test.json @@ -4,30 +4,30 @@ }, "nodes": [ -{ "id":-1, "cat":"root" }, -{ "id":0, "cat":"NP" }, -{ "id":1, "cat":"V" }, -{ "id":2, "cat":"JJ" }, -{ "id":3, "cat":"NN" }, -{ "id":4, "cat":"NP" }, -{ "id":5, "cat":"VP" }, -{ "id":6, "cat":"S" }, -{ "id":7, "cat":"Goal" } +{ "id":-1 }, +{ "id":0 }, +{ "id":1 }, +{ "id":2 }, +{ "id":3 }, +{ "id":4 }, +{ "id":5 }, +{ "id":6 }, +{ "id":7 } ], "edges": [ -{"head":0, "rule":"[NP@0:1] ||| ich ||| i ||| logp=-0.5 use_i=1.0", "tails":[ -1 ], "f":{"logp":-0.5, "use_i":1.0} }, -{"head":1, "rule":"[V@1:2] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0", "tails":[ -1 ], "f":{"logp":-0.25, "use_saw":1.0} }, -{"head":2, "rule":"[JJ@3:4] ||| kleines ||| small ||| logp=0.0 use_small=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_small":1.0} }, -{"head":2, "rule":"[JJ@3:4] ||| kleines ||| little ||| logp=0.0 use_little=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_little":1.0} }, -{"head":3, "rule":"[NN@3:5] ||| kleines haus ||| small house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0} }, -{"head":3, "rule":"[NN@3:5] ||| kleines haus ||| little house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0} }, -{"head":3, "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] house ||| logp=0.0 use_house=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_house":1.0} }, -{"head":3, "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] shell ||| logp=0.0 use_shell=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_shell":1.0} }, -{"head":4, "rule":"[NP@2:5] ||| ein [NN@3:5,1] ||| a [NN@3:5,1] ||| logp=0.0 use_a=1.0", "tails":[ 3 ], "f":{"logp":0.0, "use_a":1.0}, "weight":1.0 }, -{"head":5, "rule":"[VP@1:5] ||| [V@1:2,1] [NP@2:5,2] ||| [V@1:2,1] [NP@2:5,2] ||| logp=0.0", "tails":[ 1,4 ], "f":{"logp":0.0} }, -{"head":6, "rule":"[S@0:5] ||| [NP@0:1,1] [VP@1:5,2] ||| [NP@0:1,1] [VP@1:5,2] ||| logp=0.0", "tails":[ 0,5 ], "f":{"logp":0.0} }, -{"head":7, "rule":"[Goal@0:5] ||| [S@0:5,1] ||| [S@0:5,1] ||| ", "tails":[ 6 ], "f":{}, "weight":1.0 } +{"head":0, "rule":"[NP@0:1] ||| ich ||| i ||| logp=-0.5 use_i=1.0", "tails":[ -1 ], "f":{"logp":-0.5, "use_i":1.0} }, +{"head":1, "rule":"[V@1:2] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0", "tails":[ -1 ], "f":{"logp":-0.25, "use_saw":1.0} }, +{"head":2, "rule":"[JJ@3:4] ||| kleines ||| small ||| logp=0.0 use_small=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_small":1.0} }, +{"head":2, "rule":"[JJ@3:4] ||| kleines ||| little ||| logp=0.0 use_little=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_little":1.0} }, +{"head":3, "rule":"[NN@3:5] ||| kleines haus ||| small house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0} }, +{"head":3, "rule":"[NN@3:5] ||| kleines haus ||| little house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0} }, +{"head":3, "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] house ||| logp=0.0 use_house=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_house":1.0} }, +{"head":3, "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] shell ||| logp=0.0 use_shell=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_shell":1.0} }, +{"head":4, "rule":"[NP@2:5] ||| ein [NN@3:5,1] ||| a [NN@3:5,1] ||| logp=0.0 use_a=1.0", "tails":[ 3 ], "f":{"logp":0.0, "use_a":1.0} }, +{"head":5, "rule":"[VP@1:5] ||| [V@1:2,1] [NP@2:5,2] ||| [V@1:2,1] [NP@2:5,2] ||| logp=0.0", "tails":[ 1,4 ], "f":{"logp":0.0} }, +{"head":6, "rule":"[S@0:5] ||| [NP@0:1,1] [VP@1:5,2] ||| [NP@0:1,1] [VP@1:5,2] ||| logp=0.0", "tails":[ 0,5 ], "f":{"logp":0.0} }, +{"head":7, "rule":"[Goal@0:5] ||| [S@0:5,1] ||| [S@0:5,1] ||| ", "tails":[ 6 ], "f":{} } ] } @@ -25,9 +25,11 @@ class NT def from_s s s.delete! '[]' @symbol, meta = s.split '@' - span, index = meta.split ',' - @left, @right = span.split(':').map { |x| x.to_i } - @index = index.to_i if index + if meta + span, index = meta.split ',' + @left, @right = span.split(':').map { |x| x.to_i } + @index = index.to_i + end end def self.from_s s @@ -44,11 +46,9 @@ end class Rule attr_accessor :lhs, :rhs, :target, :map - def initialize lhs=nil, rhs=[], left=nil, right=nil, target=[] + def initialize lhs=nil, rhs=[], target=[] @lhs = lhs @rhs = rhs - @lhs.left = left if lhs - @lhs.right = right if lhs @target = target @arity_ = nil end @@ -59,20 +59,20 @@ class Rule def arity return @arity_ if @arity_ - return rhs.select { |i| i.class == NT }.size + rhs.select { |i| i.class == NT }.size end def read_right_ s - a = [] + _ = [] s.split.each { |x| x.strip! if x[0]=='[' && x[x.size-1] == ']' - a << NT.from_s(x) + _ << NT.from_s(x) else - a << T.new(x) + _ << T.new(x) end } - return a + return _ end def from_s s @@ -93,7 +93,7 @@ class Grammar attr_accessor :rules, :startn, :startt, :flat def initialize fn - @rules = []; @startn = []; @startt = [] ;@flat = [] + @rules = []; @startn = []; @startt = []; @flat = [] ReadFile.readlines_strip(fn).each_with_index { |s,i| STDERR.write '.'; STDERR.write " #{i+1}\n" if (i+1)%80==0 @rules << Rule.from_s(s) @@ -13,7 +13,6 @@ class HG::Node def initialize id=nil, cat=nil, outgoing=[], incoming=[], score=nil @id = id - @cat = cat @outgoing = outgoing @incoming = incoming @score = nil @@ -46,12 +45,12 @@ class HG::Hypergraph end class HG::Hyperedge - attr_accessor :head, :tails, :weight, :f, :mark, :rule + attr_accessor :head, :tails, :score, :f, :mark, :rule - def initialize head=nil, tails=[], weight=0.0, f=SparseVector.new, rule=nil + def initialize head=nil, tails=[], score=0.0, f=SparseVector.new, rule=nil @head = head @tails = tails - @weight = weight + @score = score @f = f @mark = 0 @rule = Grammar::Rule.from_s rule if rule @@ -66,7 +65,7 @@ class HG::Hyperedge end def to_s - "Hyperedge<head:\"#{@head.id}\", rule:\"#{@rule.to_s}, \"tails:#{@tails.map{|n|n.id}}, arity:#{arity}, weight:#{@weight}, f:#{f.to_s}, mark:#{@mark}>" + "Hyperedge<head:\"#{@head.id}\", rule:\"#{@rule.to_s}, \"tails:#{@tails.map{|n|n.id}}, arity:#{arity}, score:#{@score}, f:#{f.to_s}, mark:#{@mark}>" end end @@ -98,7 +97,7 @@ def HG::viterbi hypergraph, root, semiring=ViterbiSemiring.new e.tails.each { |m| s = semiring.multiply.call(s, m.score) } - n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight)) + n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.score)) } } end @@ -114,10 +113,10 @@ def HG::viterbi_path hypergraph, root, semiring=ViterbiSemiring.new e.tails.each { |m| s = semiring.multiply.call(s, m.score) } - if n.score < semiring.multiply.call(s, e.weight) # ViterbiSemiring add + if n.score < semiring.multiply.call(s, e.score) # ViterbiSemiring add best_edge = e end - n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight)) + n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.score)) } best_path << best_edge if best_edge } @@ -135,10 +134,10 @@ def HG::viterbi_string hypergraph, root, semiring=ViterbiSemiring.new e.tails.each { |m| s = semiring.multiply.call(s, m.score) } - if n.score < semiring.multiply.call(s, e.weight) # ViterbiSemiring add + if n.score < semiring.multiply.call(s, e.score) # ViterbiSemiring add best_s = e.e end - n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight)) + n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.score)) } s += best_s if best_s } @@ -169,14 +168,14 @@ def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=fal h = JSON.parse File.new(fn).read w = SparseVector.from_h h['weights'] h['nodes'].each { |x| - n = Node.new x['id'], x['cat'] + n = Node.new x['id'] nodes << n nodes_by_id[n.id] = n } h['edges'].each { |x| e = Hyperedge.new(nodes_by_id[x['head']], \ x['tails'].map { |j| nodes_by_id[j] }.to_a, \ - (x['weight'] ? semiring.convert.call(x['weight'].to_f) : nil), \ + (x['score'] ? semiring.convert.call(x['score'].to_f) : nil), \ (x['f'] ? SparseVector.from_h(x['f']) : nil), \ x['rule']) if x['f'] @@ -5,7 +5,7 @@ require_relative 'hg' semiring = ViterbiSemiring.new hypergraph, nodes_by_id = HG::read_hypergraph_from_json('example/json/test.json', semiring, true) -path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring +#path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring #s = HG::derive path, path.last.rule.lhs, [] #puts "#{s.map { |i| i.word }.join ' '} ||| #{score}" @@ -13,6 +13,6 @@ hypergraph.reset paths = HG::all_paths hypergraph, nodes_by_id[-1] paths.each { |p| s = HG::derive p, p.last.rule.lhs, [] -puts "#{s.map { |i| i.word }.join ' '} ||| #{score}" + puts "#{s.map { |i| i.word }.join ' '}" } |