summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-04 20:06:53 +0200
committerPatrick Simianer <p@simianer.de>2014-06-04 20:06:53 +0200
commitdbc15de7a63b939d7c3c51c39b34286aed56739f (patch)
treeba8a8150aa9b95bf18064dd4e3b728fed44b7614
parent6f1ed883692e7f9e082cadb92673e1eeed0cdc27 (diff)
fix all_paths, better json handling
-rw-r--r--example/json/test.json42
-rw-r--r--hg.rb76
-rwxr-xr-xtest.rb16
3 files changed, 72 insertions, 62 deletions
diff --git a/example/json/test.json b/example/json/test.json
index ad43ae4..bc422c6 100644
--- a/example/json/test.json
+++ b/example/json/test.json
@@ -4,30 +4,30 @@
},
"nodes":
[
-{ "label":"root", "cat":"root" },
-{ "label":"0", "cat":"NP", "left":0, "right":1 },
-{ "label":"1", "cat":"V", "left":1, "right":2 },
-{ "label":"2", "cat":"JJ", "left":3, "right":4 },
-{ "label":"3", "cat":"NN", "left":3, "right":5 },
-{ "label":"4", "cat":"NP", "left":2, "right":5 },
-{ "label":"5", "cat":"VP", "left":1, "right":5 },
-{ "label":"6", "cat":"S", "left":0, "right":5 },
-{ "label":"7", "cat":"Goal", "left":0, "right":5 }
+{ "id":-1, "cat":"root" },
+{ "id":0, "cat":"NP" },
+{ "id":1, "cat":"V" },
+{ "id":2, "cat":"JJ" },
+{ "id":3, "cat":"NN" },
+{ "id":4, "cat":"NP" },
+{ "id":5, "cat":"VP" },
+{ "id":6, "cat":"S" },
+{ "id":7, "cat":"Goal" }
],
"edges":
[
-{"head":"0", "rule":"[NP@0:1] ||| ich ||| i ||| logp=-0.5 use_i=1.0", "tails":[ "root" ], "f":{"logp":-0.5, "use_i":1.0}, "weight":0.367879441171 },
-{"head":"1", "rule":"[V@1:2] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0", "tails":[ "root" ], "f":{"logp":-0.25, "use_saw":1.0}, "weight":0.606530659713 },
-{"head":"2", "rule":"[JJ@3:4] ||| kleines ||| small ||| logp=0.0 use_small=1.0", "tails":[ "root" ], "f":{"logp":0.0, "use_small":1.0}, "weight":1.0 },
-{"head":"2", "rule":"[JJ@3:4] ||| kleines ||| little ||| logp=0.0 use_little=1.0", "tails":[ "root" ], "f":{"logp":0.0, "use_little":1.0}, "weight":1.0 },
-{"head":"3", "rule":"[NN@3:5] ||| kleines haus ||| small house ||| logp=0.0 use_house=1.0", "tails":[ "root" ], "f":{"logp":0.0, "use_house":1.0}, "weight":1.0 },
-{"head":"3", "rule":"[NN@3:5] ||| kleines haus ||| little house ||| logp=0.0 use_house=1.0", "tails":[ "root" ], "f":{"logp":0.0, "use_house":1.0}, "weight":1.0 },
-{"head":"3", "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] house ||| logp=0.0 use_house=1.0", "tails":[ "2" ], "f":{"logp":0.0, "use_house":1.0}, "weight":1.0 },
-{"head":"3", "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] shell ||| logp=0.0 use_shell=1.0", "tails":[ "2" ], "f":{"logp":0.0, "use_shell":1.0}, "weight":2.71828182846 },
-{"head":"4", "rule":"[NP@2:5] ||| ein [NN@3:5,1] ||| a [NN@3:5,1] ||| logp=0.0 use_a=1.0", "tails":[ "3" ], "f":{"logp":0.0, "use_a":1.0}, "weight":1.0 },
-{"head":"5", "rule":"[VP@1:5] ||| [V@1:2,1] [NP@2:5,2] ||| [V@1:2,1] [NP@2:5,2] ||| logp=0.0", "tails":[ "1","4" ], "f":{"logp":0.0}, "weight":1.0 },
-{"head":"6", "rule":"[S@0:5] ||| [NP@0:1,1] [VP@1:5,2] ||| [NP@0:1,1] [VP@1:5,2] ||| logp=0.0", "tails":[ "0","5" ], "f":{"logp":0.0}, "weight":1.0 },
-{"head":"7", "rule":"[Goal@0:5] ||| [S@0:5,1] ||| [S@0:5,1] ||| ", "tails":[ "6" ], "f":{}, "weight":1.0 }
+{"head":0, "rule":"[NP@0:1] ||| ich ||| i ||| logp=-0.5 use_i=1.0", "tails":[ -1 ], "f":{"logp":-0.5, "use_i":1.0} },
+{"head":1, "rule":"[V@1:2] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0", "tails":[ -1 ], "f":{"logp":-0.25, "use_saw":1.0} },
+{"head":2, "rule":"[JJ@3:4] ||| kleines ||| small ||| logp=0.0 use_small=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_small":1.0} },
+{"head":2, "rule":"[JJ@3:4] ||| kleines ||| little ||| logp=0.0 use_little=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_little":1.0} },
+{"head":3, "rule":"[NN@3:5] ||| kleines haus ||| small house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0} },
+{"head":3, "rule":"[NN@3:5] ||| kleines haus ||| little house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0} },
+{"head":3, "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] house ||| logp=0.0 use_house=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_house":1.0} },
+{"head":3, "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] shell ||| logp=0.0 use_shell=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_shell":1.0} },
+{"head":4, "rule":"[NP@2:5] ||| ein [NN@3:5,1] ||| a [NN@3:5,1] ||| logp=0.0 use_a=1.0", "tails":[ 3 ], "f":{"logp":0.0, "use_a":1.0}, "weight":1.0 },
+{"head":5, "rule":"[VP@1:5] ||| [V@1:2,1] [NP@2:5,2] ||| [V@1:2,1] [NP@2:5,2] ||| logp=0.0", "tails":[ 1,4 ], "f":{"logp":0.0} },
+{"head":6, "rule":"[S@0:5] ||| [NP@0:1,1] [VP@1:5,2] ||| [NP@0:1,1] [VP@1:5,2] ||| logp=0.0", "tails":[ 0,5 ], "f":{"logp":0.0} },
+{"head":7, "rule":"[Goal@0:5] ||| [S@0:5,1] ||| [S@0:5,1] ||| ", "tails":[ 6 ], "f":{}, "weight":1.0 }
]
}
diff --git a/hg.rb b/hg.rb
index c7ee6ba..455a22b 100644
--- a/hg.rb
+++ b/hg.rb
@@ -9,10 +9,10 @@ module HG
class HG::Node
- attr_accessor :label, :cat, :outgoing, :incoming, :score
+ attr_accessor :id, :cat, :outgoing, :incoming, :score
- def initialize label=nil, cat=nil, outgoing=[], incoming=[], score=nil
- @label = label
+ def initialize id=nil, cat=nil, outgoing=[], incoming=[], score=nil
+ @id = id
@cat = cat
@outgoing = outgoing
@incoming = incoming
@@ -20,7 +20,7 @@ class HG::Node
end
def to_s
- "Node<label:\"#{@label}\", cat:\"#{@cat}\", outgoing:#{@outgoing.size}, incoming:#{@incoming.size}>"
+ "Node<id:#{@id}, cat:\"#{@cat}\", outgoing:#{@outgoing.size}, incoming:#{@incoming.size}>"
end
end
@@ -36,6 +36,10 @@ class HG::Hypergraph
@edges.map { |e| e.arity }.max
end
+ def reset
+ @edges.each { |e| e.mark = 0 }
+ end
+
def to_s
"Hypergraph<nodes:[#{@nodes.to_s}], edges:[#{@edges.to_s}], arity:#{arity}>"
end
@@ -62,7 +66,7 @@ class HG::Hyperedge
end
def to_s
- "Hyperedge<head:\"#{@head.label}\", rule:\"#{@rule.to_s}, \"tails:#{@tails.map{|n|n.label}}, arity:#{arity}, weight:#{@weight}, f:#{f.to_s}, mark:#{@mark}>"
+ "Hyperedge<head:\"#{@head.id}\", rule:\"#{@rule.to_s}, \"tails:#{@tails.map{|n|n.id}}, arity:#{arity}, weight:#{@weight}, f:#{f.to_s}, mark:#{@mark}>"
end
end
@@ -141,29 +145,46 @@ def HG::viterbi_string hypergraph, root, semiring=ViterbiSemiring.new
return s, toposorted.last.score
end
+def HG::all_paths hypergraph, root
+ toposorted = topological_sort hypergraph.nodes
+ paths = [[]]
+ toposorted.each { |n|
+ next if n.incoming.empty?
+ new_paths = []
+ while !paths.empty?
+ p = paths.pop
+ n.incoming.each { |e|
+ new_paths << p+[e]
+ }
+ end
+ paths = new_paths
+ }
+ return paths
+end
+
def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=false
nodes = []
edges = []
- nodes_by_label = {}
- nodes_by_index = []
+ nodes_by_id = {}
h = JSON.parse File.new(fn).read
w = SparseVector.from_h h['weights']
h['nodes'].each { |x|
- n = Node.new x['label'], x['cat']
+ n = Node.new x['id'], x['cat']
nodes << n
- nodes_by_label[n.label] = n
- nodes_by_index << n
+ nodes_by_id[n.id] = n
}
h['edges'].each { |x|
- e = Hyperedge.new(nodes_by_label[x['head']], \
- x['tails'].map { |j| nodes_by_label[j] }.to_a, \
- semiring.convert.call(x['weight'].to_f), \
- SparseVector.from_h(x['f']), \
+ e = Hyperedge.new(nodes_by_id[x['head']], \
+ x['tails'].map { |j| nodes_by_id[j] }.to_a, \
+ (x['weight'] ? semiring.convert.call(x['weight'].to_f) : nil), \
+ (x['f'] ? SparseVector.from_h(x['f']) : nil), \
x['rule'])
- if log_weights
- e.weight = Math.exp(w.dot(e.f))
- else
- e.weight = w.dot(e.f)
+ if x['f']
+ if log_weights
+ e.weight = Math.exp(w.dot(e.f))
+ else
+ e.weight = w.dot(e.f)
+ end
end
e.tails.each { |m|
m.outgoing << e
@@ -171,24 +192,7 @@ def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=fal
e.head.incoming << e
edges << e
}
- return Hypergraph.new(nodes, edges), nodes_by_label, nodes_by_index
-end
-
-def HG::all_paths hypergraph, root, semiring=ViterbiSemiring.new #FIXME?
- toposorted = topological_sort hypergraph.nodes
- paths = [[]]
- toposorted.each { |n|
- next if n.incoming.empty?
- new_paths = []
- while !paths.empty?
- p = paths.pop
- n.incoming.each { |e|
- new_paths << p+[e]
- }
- end
- paths = new_paths
- }
- return paths
+ return Hypergraph.new(nodes, edges), nodes_by_id
end
def HG::derive path, cur, carry
diff --git a/test.rb b/test.rb
index a1b07a8..bc2ed30 100755
--- a/test.rb
+++ b/test.rb
@@ -3,10 +3,16 @@
require_relative 'hg'
-
semiring = ViterbiSemiring.new
-hypergraph, nodes_by_label, _ = HG::read_hypergraph_from_json('example/json/test.json', semiring, true)
-path, _ = HG::viterbi_path hypergraph, nodes_by_label['root'], semiring
-s = HG::derive path, path.last.rule.lhs, []
-puts s.map { |i| i.word }.join ' '
+hypergraph, nodes_by_id = HG::read_hypergraph_from_json('example/json/test.json', semiring, true)
+path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring
+#s = HG::derive path, path.last.rule.lhs, []
+#puts "#{s.map { |i| i.word }.join ' '} ||| #{score}"
+
+hypergraph.reset
+paths = HG::all_paths hypergraph, nodes_by_id[-1]
+paths.each { |p|
+ s = HG::derive p, p.last.rule.lhs, []
+puts "#{s.map { |i| i.word }.join ' '} ||| #{score}"
+}