diff options
author | Patrick Simianer <p@simianer.de> | 2014-06-04 20:06:53 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-06-04 20:06:53 +0200 |
commit | dbc15de7a63b939d7c3c51c39b34286aed56739f (patch) | |
tree | ba8a8150aa9b95bf18064dd4e3b728fed44b7614 | |
parent | 6f1ed883692e7f9e082cadb92673e1eeed0cdc27 (diff) |
fix all_paths, better json handling
-rw-r--r-- | example/json/test.json | 42 | ||||
-rw-r--r-- | hg.rb | 76 | ||||
-rwxr-xr-x | test.rb | 16 |
3 files changed, 72 insertions, 62 deletions
diff --git a/example/json/test.json b/example/json/test.json index ad43ae4..bc422c6 100644 --- a/example/json/test.json +++ b/example/json/test.json @@ -4,30 +4,30 @@ }, "nodes": [ -{ "label":"root", "cat":"root" }, -{ "label":"0", "cat":"NP", "left":0, "right":1 }, -{ "label":"1", "cat":"V", "left":1, "right":2 }, -{ "label":"2", "cat":"JJ", "left":3, "right":4 }, -{ "label":"3", "cat":"NN", "left":3, "right":5 }, -{ "label":"4", "cat":"NP", "left":2, "right":5 }, -{ "label":"5", "cat":"VP", "left":1, "right":5 }, -{ "label":"6", "cat":"S", "left":0, "right":5 }, -{ "label":"7", "cat":"Goal", "left":0, "right":5 } +{ "id":-1, "cat":"root" }, +{ "id":0, "cat":"NP" }, +{ "id":1, "cat":"V" }, +{ "id":2, "cat":"JJ" }, +{ "id":3, "cat":"NN" }, +{ "id":4, "cat":"NP" }, +{ "id":5, "cat":"VP" }, +{ "id":6, "cat":"S" }, +{ "id":7, "cat":"Goal" } ], "edges": [ -{"head":"0", "rule":"[NP@0:1] ||| ich ||| i ||| logp=-0.5 use_i=1.0", "tails":[ "root" ], "f":{"logp":-0.5, "use_i":1.0}, "weight":0.367879441171 }, -{"head":"1", "rule":"[V@1:2] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0", "tails":[ "root" ], "f":{"logp":-0.25, "use_saw":1.0}, "weight":0.606530659713 }, -{"head":"2", "rule":"[JJ@3:4] ||| kleines ||| small ||| logp=0.0 use_small=1.0", "tails":[ "root" ], "f":{"logp":0.0, "use_small":1.0}, "weight":1.0 }, -{"head":"2", "rule":"[JJ@3:4] ||| kleines ||| little ||| logp=0.0 use_little=1.0", "tails":[ "root" ], "f":{"logp":0.0, "use_little":1.0}, "weight":1.0 }, -{"head":"3", "rule":"[NN@3:5] ||| kleines haus ||| small house ||| logp=0.0 use_house=1.0", "tails":[ "root" ], "f":{"logp":0.0, "use_house":1.0}, "weight":1.0 }, -{"head":"3", "rule":"[NN@3:5] ||| kleines haus ||| little house ||| logp=0.0 use_house=1.0", "tails":[ "root" ], "f":{"logp":0.0, "use_house":1.0}, "weight":1.0 }, -{"head":"3", "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] house ||| logp=0.0 use_house=1.0", "tails":[ "2" ], "f":{"logp":0.0, "use_house":1.0}, "weight":1.0 }, -{"head":"3", "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] shell ||| logp=0.0 use_shell=1.0", "tails":[ "2" ], "f":{"logp":0.0, "use_shell":1.0}, "weight":2.71828182846 }, -{"head":"4", "rule":"[NP@2:5] ||| ein [NN@3:5,1] ||| a [NN@3:5,1] ||| logp=0.0 use_a=1.0", "tails":[ "3" ], "f":{"logp":0.0, "use_a":1.0}, "weight":1.0 }, -{"head":"5", "rule":"[VP@1:5] ||| [V@1:2,1] [NP@2:5,2] ||| [V@1:2,1] [NP@2:5,2] ||| logp=0.0", "tails":[ "1","4" ], "f":{"logp":0.0}, "weight":1.0 }, -{"head":"6", "rule":"[S@0:5] ||| [NP@0:1,1] [VP@1:5,2] ||| [NP@0:1,1] [VP@1:5,2] ||| logp=0.0", "tails":[ "0","5" ], "f":{"logp":0.0}, "weight":1.0 }, -{"head":"7", "rule":"[Goal@0:5] ||| [S@0:5,1] ||| [S@0:5,1] ||| ", "tails":[ "6" ], "f":{}, "weight":1.0 } +{"head":0, "rule":"[NP@0:1] ||| ich ||| i ||| logp=-0.5 use_i=1.0", "tails":[ -1 ], "f":{"logp":-0.5, "use_i":1.0} }, +{"head":1, "rule":"[V@1:2] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0", "tails":[ -1 ], "f":{"logp":-0.25, "use_saw":1.0} }, +{"head":2, "rule":"[JJ@3:4] ||| kleines ||| small ||| logp=0.0 use_small=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_small":1.0} }, +{"head":2, "rule":"[JJ@3:4] ||| kleines ||| little ||| logp=0.0 use_little=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_little":1.0} }, +{"head":3, "rule":"[NN@3:5] ||| kleines haus ||| small house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0} }, +{"head":3, "rule":"[NN@3:5] ||| kleines haus ||| little house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0} }, +{"head":3, "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] house ||| logp=0.0 use_house=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_house":1.0} }, +{"head":3, "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] shell ||| logp=0.0 use_shell=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_shell":1.0} }, +{"head":4, "rule":"[NP@2:5] ||| ein [NN@3:5,1] ||| a [NN@3:5,1] ||| logp=0.0 use_a=1.0", "tails":[ 3 ], "f":{"logp":0.0, "use_a":1.0}, "weight":1.0 }, +{"head":5, "rule":"[VP@1:5] ||| [V@1:2,1] [NP@2:5,2] ||| [V@1:2,1] [NP@2:5,2] ||| logp=0.0", "tails":[ 1,4 ], "f":{"logp":0.0} }, +{"head":6, "rule":"[S@0:5] ||| [NP@0:1,1] [VP@1:5,2] ||| [NP@0:1,1] [VP@1:5,2] ||| logp=0.0", "tails":[ 0,5 ], "f":{"logp":0.0} }, +{"head":7, "rule":"[Goal@0:5] ||| [S@0:5,1] ||| [S@0:5,1] ||| ", "tails":[ 6 ], "f":{}, "weight":1.0 } ] } @@ -9,10 +9,10 @@ module HG class HG::Node - attr_accessor :label, :cat, :outgoing, :incoming, :score + attr_accessor :id, :cat, :outgoing, :incoming, :score - def initialize label=nil, cat=nil, outgoing=[], incoming=[], score=nil - @label = label + def initialize id=nil, cat=nil, outgoing=[], incoming=[], score=nil + @id = id @cat = cat @outgoing = outgoing @incoming = incoming @@ -20,7 +20,7 @@ class HG::Node end def to_s - "Node<label:\"#{@label}\", cat:\"#{@cat}\", outgoing:#{@outgoing.size}, incoming:#{@incoming.size}>" + "Node<id:#{@id}, cat:\"#{@cat}\", outgoing:#{@outgoing.size}, incoming:#{@incoming.size}>" end end @@ -36,6 +36,10 @@ class HG::Hypergraph @edges.map { |e| e.arity }.max end + def reset + @edges.each { |e| e.mark = 0 } + end + def to_s "Hypergraph<nodes:[#{@nodes.to_s}], edges:[#{@edges.to_s}], arity:#{arity}>" end @@ -62,7 +66,7 @@ class HG::Hyperedge end def to_s - "Hyperedge<head:\"#{@head.label}\", rule:\"#{@rule.to_s}, \"tails:#{@tails.map{|n|n.label}}, arity:#{arity}, weight:#{@weight}, f:#{f.to_s}, mark:#{@mark}>" + "Hyperedge<head:\"#{@head.id}\", rule:\"#{@rule.to_s}, \"tails:#{@tails.map{|n|n.id}}, arity:#{arity}, weight:#{@weight}, f:#{f.to_s}, mark:#{@mark}>" end end @@ -141,29 +145,46 @@ def HG::viterbi_string hypergraph, root, semiring=ViterbiSemiring.new return s, toposorted.last.score end +def HG::all_paths hypergraph, root + toposorted = topological_sort hypergraph.nodes + paths = [[]] + toposorted.each { |n| + next if n.incoming.empty? + new_paths = [] + while !paths.empty? + p = paths.pop + n.incoming.each { |e| + new_paths << p+[e] + } + end + paths = new_paths + } + return paths +end + def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=false nodes = [] edges = [] - nodes_by_label = {} - nodes_by_index = [] + nodes_by_id = {} h = JSON.parse File.new(fn).read w = SparseVector.from_h h['weights'] h['nodes'].each { |x| - n = Node.new x['label'], x['cat'] + n = Node.new x['id'], x['cat'] nodes << n - nodes_by_label[n.label] = n - nodes_by_index << n + nodes_by_id[n.id] = n } h['edges'].each { |x| - e = Hyperedge.new(nodes_by_label[x['head']], \ - x['tails'].map { |j| nodes_by_label[j] }.to_a, \ - semiring.convert.call(x['weight'].to_f), \ - SparseVector.from_h(x['f']), \ + e = Hyperedge.new(nodes_by_id[x['head']], \ + x['tails'].map { |j| nodes_by_id[j] }.to_a, \ + (x['weight'] ? semiring.convert.call(x['weight'].to_f) : nil), \ + (x['f'] ? SparseVector.from_h(x['f']) : nil), \ x['rule']) - if log_weights - e.weight = Math.exp(w.dot(e.f)) - else - e.weight = w.dot(e.f) + if x['f'] + if log_weights + e.weight = Math.exp(w.dot(e.f)) + else + e.weight = w.dot(e.f) + end end e.tails.each { |m| m.outgoing << e @@ -171,24 +192,7 @@ def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=fal e.head.incoming << e edges << e } - return Hypergraph.new(nodes, edges), nodes_by_label, nodes_by_index -end - -def HG::all_paths hypergraph, root, semiring=ViterbiSemiring.new #FIXME? - toposorted = topological_sort hypergraph.nodes - paths = [[]] - toposorted.each { |n| - next if n.incoming.empty? - new_paths = [] - while !paths.empty? - p = paths.pop - n.incoming.each { |e| - new_paths << p+[e] - } - end - paths = new_paths - } - return paths + return Hypergraph.new(nodes, edges), nodes_by_id end def HG::derive path, cur, carry @@ -3,10 +3,16 @@ require_relative 'hg' - semiring = ViterbiSemiring.new -hypergraph, nodes_by_label, _ = HG::read_hypergraph_from_json('example/json/test.json', semiring, true) -path, _ = HG::viterbi_path hypergraph, nodes_by_label['root'], semiring -s = HG::derive path, path.last.rule.lhs, [] -puts s.map { |i| i.word }.join ' ' +hypergraph, nodes_by_id = HG::read_hypergraph_from_json('example/json/test.json', semiring, true) +path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring +#s = HG::derive path, path.last.rule.lhs, [] +#puts "#{s.map { |i| i.word }.join ' '} ||| #{score}" + +hypergraph.reset +paths = HG::all_paths hypergraph, nodes_by_id[-1] +paths.each { |p| + s = HG::derive p, p.last.rule.lhs, [] +puts "#{s.map { |i| i.word }.join ' '} ||| #{score}" +} |