summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-04 20:26:06 +0200
committerPatrick Simianer <p@simianer.de>2014-06-04 20:26:06 +0200
commita10db22ce00bd004682a00322b4d177b694082b7 (patch)
tree1a97ab371137cd459418f12db00237f0039fada1
parentdbc15de7a63b939d7c3c51c39b34286aed56739f (diff)
refactoring, e.g. edges have scores, not weights
-rw-r--r--example/json/test.json42
-rw-r--r--grammar.rb24
-rw-r--r--hg.rb23
-rwxr-xr-xtest.rb4
4 files changed, 46 insertions, 47 deletions
diff --git a/example/json/test.json b/example/json/test.json
index bc422c6..aa0b45e 100644
--- a/example/json/test.json
+++ b/example/json/test.json
@@ -4,30 +4,30 @@
},
"nodes":
[
-{ "id":-1, "cat":"root" },
-{ "id":0, "cat":"NP" },
-{ "id":1, "cat":"V" },
-{ "id":2, "cat":"JJ" },
-{ "id":3, "cat":"NN" },
-{ "id":4, "cat":"NP" },
-{ "id":5, "cat":"VP" },
-{ "id":6, "cat":"S" },
-{ "id":7, "cat":"Goal" }
+{ "id":-1 },
+{ "id":0 },
+{ "id":1 },
+{ "id":2 },
+{ "id":3 },
+{ "id":4 },
+{ "id":5 },
+{ "id":6 },
+{ "id":7 }
],
"edges":
[
-{"head":0, "rule":"[NP@0:1] ||| ich ||| i ||| logp=-0.5 use_i=1.0", "tails":[ -1 ], "f":{"logp":-0.5, "use_i":1.0} },
-{"head":1, "rule":"[V@1:2] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0", "tails":[ -1 ], "f":{"logp":-0.25, "use_saw":1.0} },
-{"head":2, "rule":"[JJ@3:4] ||| kleines ||| small ||| logp=0.0 use_small=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_small":1.0} },
-{"head":2, "rule":"[JJ@3:4] ||| kleines ||| little ||| logp=0.0 use_little=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_little":1.0} },
-{"head":3, "rule":"[NN@3:5] ||| kleines haus ||| small house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0} },
-{"head":3, "rule":"[NN@3:5] ||| kleines haus ||| little house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0} },
-{"head":3, "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] house ||| logp=0.0 use_house=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_house":1.0} },
-{"head":3, "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] shell ||| logp=0.0 use_shell=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_shell":1.0} },
-{"head":4, "rule":"[NP@2:5] ||| ein [NN@3:5,1] ||| a [NN@3:5,1] ||| logp=0.0 use_a=1.0", "tails":[ 3 ], "f":{"logp":0.0, "use_a":1.0}, "weight":1.0 },
-{"head":5, "rule":"[VP@1:5] ||| [V@1:2,1] [NP@2:5,2] ||| [V@1:2,1] [NP@2:5,2] ||| logp=0.0", "tails":[ 1,4 ], "f":{"logp":0.0} },
-{"head":6, "rule":"[S@0:5] ||| [NP@0:1,1] [VP@1:5,2] ||| [NP@0:1,1] [VP@1:5,2] ||| logp=0.0", "tails":[ 0,5 ], "f":{"logp":0.0} },
-{"head":7, "rule":"[Goal@0:5] ||| [S@0:5,1] ||| [S@0:5,1] ||| ", "tails":[ 6 ], "f":{}, "weight":1.0 }
+{"head":0, "rule":"[NP@0:1] ||| ich ||| i ||| logp=-0.5 use_i=1.0", "tails":[ -1 ], "f":{"logp":-0.5, "use_i":1.0} },
+{"head":1, "rule":"[V@1:2] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0", "tails":[ -1 ], "f":{"logp":-0.25, "use_saw":1.0} },
+{"head":2, "rule":"[JJ@3:4] ||| kleines ||| small ||| logp=0.0 use_small=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_small":1.0} },
+{"head":2, "rule":"[JJ@3:4] ||| kleines ||| little ||| logp=0.0 use_little=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_little":1.0} },
+{"head":3, "rule":"[NN@3:5] ||| kleines haus ||| small house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0} },
+{"head":3, "rule":"[NN@3:5] ||| kleines haus ||| little house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0} },
+{"head":3, "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] house ||| logp=0.0 use_house=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_house":1.0} },
+{"head":3, "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] shell ||| logp=0.0 use_shell=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_shell":1.0} },
+{"head":4, "rule":"[NP@2:5] ||| ein [NN@3:5,1] ||| a [NN@3:5,1] ||| logp=0.0 use_a=1.0", "tails":[ 3 ], "f":{"logp":0.0, "use_a":1.0} },
+{"head":5, "rule":"[VP@1:5] ||| [V@1:2,1] [NP@2:5,2] ||| [V@1:2,1] [NP@2:5,2] ||| logp=0.0", "tails":[ 1,4 ], "f":{"logp":0.0} },
+{"head":6, "rule":"[S@0:5] ||| [NP@0:1,1] [VP@1:5,2] ||| [NP@0:1,1] [VP@1:5,2] ||| logp=0.0", "tails":[ 0,5 ], "f":{"logp":0.0} },
+{"head":7, "rule":"[Goal@0:5] ||| [S@0:5,1] ||| [S@0:5,1] ||| ", "tails":[ 6 ], "f":{} }
]
}
diff --git a/grammar.rb b/grammar.rb
index f4ffe90..7bc6c9b 100644
--- a/grammar.rb
+++ b/grammar.rb
@@ -25,9 +25,11 @@ class NT
def from_s s
s.delete! '[]'
@symbol, meta = s.split '@'
- span, index = meta.split ','
- @left, @right = span.split(':').map { |x| x.to_i }
- @index = index.to_i if index
+ if meta
+ span, index = meta.split ','
+ @left, @right = span.split(':').map { |x| x.to_i }
+ @index = index.to_i
+ end
end
def self.from_s s
@@ -44,11 +46,9 @@ end
class Rule
attr_accessor :lhs, :rhs, :target, :map
- def initialize lhs=nil, rhs=[], left=nil, right=nil, target=[]
+ def initialize lhs=nil, rhs=[], target=[]
@lhs = lhs
@rhs = rhs
- @lhs.left = left if lhs
- @lhs.right = right if lhs
@target = target
@arity_ = nil
end
@@ -59,20 +59,20 @@ class Rule
def arity
return @arity_ if @arity_
- return rhs.select { |i| i.class == NT }.size
+ rhs.select { |i| i.class == NT }.size
end
def read_right_ s
- a = []
+ _ = []
s.split.each { |x|
x.strip!
if x[0]=='[' && x[x.size-1] == ']'
- a << NT.from_s(x)
+ _ << NT.from_s(x)
else
- a << T.new(x)
+ _ << T.new(x)
end
}
- return a
+ return _
end
def from_s s
@@ -93,7 +93,7 @@ class Grammar
attr_accessor :rules, :startn, :startt, :flat
def initialize fn
- @rules = []; @startn = []; @startt = [] ;@flat = []
+ @rules = []; @startn = []; @startt = []; @flat = []
ReadFile.readlines_strip(fn).each_with_index { |s,i|
STDERR.write '.'; STDERR.write " #{i+1}\n" if (i+1)%80==0
@rules << Rule.from_s(s)
diff --git a/hg.rb b/hg.rb
index 455a22b..a7d1d99 100644
--- a/hg.rb
+++ b/hg.rb
@@ -13,7 +13,6 @@ class HG::Node
def initialize id=nil, cat=nil, outgoing=[], incoming=[], score=nil
@id = id
- @cat = cat
@outgoing = outgoing
@incoming = incoming
@score = nil
@@ -46,12 +45,12 @@ class HG::Hypergraph
end
class HG::Hyperedge
- attr_accessor :head, :tails, :weight, :f, :mark, :rule
+ attr_accessor :head, :tails, :score, :f, :mark, :rule
- def initialize head=nil, tails=[], weight=0.0, f=SparseVector.new, rule=nil
+ def initialize head=nil, tails=[], score=0.0, f=SparseVector.new, rule=nil
@head = head
@tails = tails
- @weight = weight
+ @score = score
@f = f
@mark = 0
@rule = Grammar::Rule.from_s rule if rule
@@ -66,7 +65,7 @@ class HG::Hyperedge
end
def to_s
- "Hyperedge<head:\"#{@head.id}\", rule:\"#{@rule.to_s}, \"tails:#{@tails.map{|n|n.id}}, arity:#{arity}, weight:#{@weight}, f:#{f.to_s}, mark:#{@mark}>"
+ "Hyperedge<head:\"#{@head.id}\", rule:\"#{@rule.to_s}, \"tails:#{@tails.map{|n|n.id}}, arity:#{arity}, score:#{@score}, f:#{f.to_s}, mark:#{@mark}>"
end
end
@@ -98,7 +97,7 @@ def HG::viterbi hypergraph, root, semiring=ViterbiSemiring.new
e.tails.each { |m|
s = semiring.multiply.call(s, m.score)
}
- n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight))
+ n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.score))
}
}
end
@@ -114,10 +113,10 @@ def HG::viterbi_path hypergraph, root, semiring=ViterbiSemiring.new
e.tails.each { |m|
s = semiring.multiply.call(s, m.score)
}
- if n.score < semiring.multiply.call(s, e.weight) # ViterbiSemiring add
+ if n.score < semiring.multiply.call(s, e.score) # ViterbiSemiring add
best_edge = e
end
- n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight))
+ n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.score))
}
best_path << best_edge if best_edge
}
@@ -135,10 +134,10 @@ def HG::viterbi_string hypergraph, root, semiring=ViterbiSemiring.new
e.tails.each { |m|
s = semiring.multiply.call(s, m.score)
}
- if n.score < semiring.multiply.call(s, e.weight) # ViterbiSemiring add
+ if n.score < semiring.multiply.call(s, e.score) # ViterbiSemiring add
best_s = e.e
end
- n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.weight))
+ n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.score))
}
s += best_s if best_s
}
@@ -169,14 +168,14 @@ def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=fal
h = JSON.parse File.new(fn).read
w = SparseVector.from_h h['weights']
h['nodes'].each { |x|
- n = Node.new x['id'], x['cat']
+ n = Node.new x['id']
nodes << n
nodes_by_id[n.id] = n
}
h['edges'].each { |x|
e = Hyperedge.new(nodes_by_id[x['head']], \
x['tails'].map { |j| nodes_by_id[j] }.to_a, \
- (x['weight'] ? semiring.convert.call(x['weight'].to_f) : nil), \
+ (x['score'] ? semiring.convert.call(x['score'].to_f) : nil), \
(x['f'] ? SparseVector.from_h(x['f']) : nil), \
x['rule'])
if x['f']
diff --git a/test.rb b/test.rb
index bc2ed30..d9fbdfa 100755
--- a/test.rb
+++ b/test.rb
@@ -5,7 +5,7 @@ require_relative 'hg'
semiring = ViterbiSemiring.new
hypergraph, nodes_by_id = HG::read_hypergraph_from_json('example/json/test.json', semiring, true)
-path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring
+#path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring
#s = HG::derive path, path.last.rule.lhs, []
#puts "#{s.map { |i| i.word }.join ' '} ||| #{score}"
@@ -13,6 +13,6 @@ hypergraph.reset
paths = HG::all_paths hypergraph, nodes_by_id[-1]
paths.each { |p|
s = HG::derive p, p.last.rule.lhs, []
-puts "#{s.map { |i| i.word }.join ' '} ||| #{score}"
+ puts "#{s.map { |i| i.word }.join ' '}"
}