summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-07 11:44:02 +0200
committerPatrick Simianer <p@simianer.de>2014-06-07 11:44:02 +0200
commit24e296e97c32fdf6c3b7fd5ecb5596165d4dad14 (patch)
tree6d472fd5f46d359beddc9169a95deacfd4986ff4
parentbc71d67647ac9b124cc666a43fc819cad20cfe06 (diff)
better json format, class hierarchy untangled
-rw-r--r--example/json/test.json44
-rw-r--r--grammar.rb30
-rw-r--r--hg.rb78
-rwxr-xr-xtest.rb9
-rwxr-xr-xutil/cdec_hg_to_json.py30
5 files changed, 81 insertions, 110 deletions
diff --git a/example/json/test.json b/example/json/test.json
index aa0b45e..d865fa0 100644
--- a/example/json/test.json
+++ b/example/json/test.json
@@ -1,33 +1,33 @@
{
"weights":{
-"PhraseModel_0":0.0, "PhraseModel_1":0.0, "PhraseModel_2":0.0, "PhraseModel_3":0.0, "PhraseModel_4":0.0, "PhraseModel_5":0.0, "PhraseModel_6":0.0, "PhraseModel_7":0.0, "PhraseModel_8":0.0, "PhraseModel_9":0.0, "PhraseModel_10":0.0, "PhraseModel_11":0.0, "PhraseModel_12":0.0, "PhraseModel_13":0.0, "PhraseModel_14":0.0, "PhraseModel_15":0.0, "PhraseModel_16":0.0, "PhraseModel_17":0.0, "PhraseModel_18":0.0, "PhraseModel_19":0.0, "PhraseModel_20":0.0, "PhraseModel_21":0.0, "PhraseModel_22":0.0, "PhraseModel_23":0.0, "PhraseModel_24":0.0, "PhraseModel_25":0.0, "PhraseModel_26":0.0, "PhraseModel_27":0.0, "PhraseModel_28":0.0, "PhraseModel_29":0.0, "PhraseModel_30":0.0, "PhraseModel_31":0.0, "PhraseModel_32":0.0, "PhraseModel_33":0.0, "PhraseModel_34":0.0, "PhraseModel_35":0.0, "PhraseModel_36":0.0, "PhraseModel_37":0.0, "PhraseModel_38":0.0, "PhraseModel_39":0.0, "PhraseModel_40":0.0, "PhraseModel_41":0.0, "PhraseModel_42":0.0, "PhraseModel_43":0.0, "PhraseModel_44":0.0, "PhraseModel_45":0.0, "PhraseModel_46":0.0, "PhraseModel_47":0.0, "PhraseModel_48":0.0, "PhraseModel_49":0.0, "PhraseModel_50":0.0, "PhraseModel_51":0.0, "PhraseModel_52":0.0, "PhraseModel_53":0.0, "PhraseModel_54":0.0, "PhraseModel_55":0.0, "PhraseModel_56":0.0, "PhraseModel_57":0.0, "PhraseModel_58":0.0, "PhraseModel_59":0.0, "PhraseModel_60":0.0, "PhraseModel_61":0.0, "PhraseModel_62":0.0, "PhraseModel_63":0.0, "PhraseModel_64":0.0, "PhraseModel_65":0.0, "PhraseModel_66":0.0, "PhraseModel_67":0.0, "PhraseModel_68":0.0, "PhraseModel_69":0.0, "PhraseModel_70":0.0, "PhraseModel_71":0.0, "PhraseModel_72":0.0, "PhraseModel_73":0.0, "PhraseModel_74":0.0, "PhraseModel_75":0.0, "PhraseModel_76":0.0, "PhraseModel_77":0.0, "PhraseModel_78":0.0, "PhraseModel_79":0.0, "PhraseModel_80":0.0, "PhraseModel_81":0.0, "PhraseModel_82":0.0, "PhraseModel_83":0.0, "PhraseModel_84":0.0, "PhraseModel_85":0.0, "PhraseModel_86":0.0, "PhraseModel_87":0.0, "PhraseModel_88":0.0, "PhraseModel_89":0.0, "PhraseModel_90":0.0, "PhraseModel_91":0.0, "PhraseModel_92":0.0, "PhraseModel_93":0.0, "PhraseModel_94":0.0, "PhraseModel_95":0.0, "PhraseModel_96":0.0, "PhraseModel_97":0.0, "PhraseModel_98":0.0, "PhraseModel_99":0.0, "logp":2.0, "use_i":0.0, "use_a":0.0, "use_house":15.0, "use_shell":1.0
+"logp":2.0, "use_shell":1.0
},
"nodes":
[
-{ "id":-1 },
-{ "id":0 },
-{ "id":1 },
-{ "id":2 },
-{ "id":3 },
-{ "id":4 },
-{ "id":5 },
-{ "id":6 },
-{ "id":7 }
+{ "id":-1, "cat":"root", "span":[-1,-1] },
+{ "id":0, "cat":"NP", "span":[0,1] },
+{ "id":1, "cat":"V", "span":[1,2] },
+{ "id":2, "cat":"JJ", "span":[3,4] },
+{ "id":3, "cat":"NN", "span":[3,5] },
+{ "id":4, "cat":"NP", "span":[2,5] },
+{ "id":5, "cat":"VP", "span":[1,5] },
+{ "id":6, "cat":"S", "span":[0,5] },
+{ "id":7, "cat":"Goal", "span":[0,5] }
],
"edges":
[
-{"head":0, "rule":"[NP@0:1] ||| ich ||| i ||| logp=-0.5 use_i=1.0", "tails":[ -1 ], "f":{"logp":-0.5, "use_i":1.0} },
-{"head":1, "rule":"[V@1:2] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0", "tails":[ -1 ], "f":{"logp":-0.25, "use_saw":1.0} },
-{"head":2, "rule":"[JJ@3:4] ||| kleines ||| small ||| logp=0.0 use_small=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_small":1.0} },
-{"head":2, "rule":"[JJ@3:4] ||| kleines ||| little ||| logp=0.0 use_little=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_little":1.0} },
-{"head":3, "rule":"[NN@3:5] ||| kleines haus ||| small house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0} },
-{"head":3, "rule":"[NN@3:5] ||| kleines haus ||| little house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0} },
-{"head":3, "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] house ||| logp=0.0 use_house=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_house":1.0} },
-{"head":3, "rule":"[NN@3:5] ||| [JJ@3:4,1] haus ||| [JJ@3:4,1] shell ||| logp=0.0 use_shell=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_shell":1.0} },
-{"head":4, "rule":"[NP@2:5] ||| ein [NN@3:5,1] ||| a [NN@3:5,1] ||| logp=0.0 use_a=1.0", "tails":[ 3 ], "f":{"logp":0.0, "use_a":1.0} },
-{"head":5, "rule":"[VP@1:5] ||| [V@1:2,1] [NP@2:5,2] ||| [V@1:2,1] [NP@2:5,2] ||| logp=0.0", "tails":[ 1,4 ], "f":{"logp":0.0} },
-{"head":6, "rule":"[S@0:5] ||| [NP@0:1,1] [VP@1:5,2] ||| [NP@0:1,1] [VP@1:5,2] ||| logp=0.0", "tails":[ 0,5 ], "f":{"logp":0.0} },
-{"head":7, "rule":"[Goal@0:5] ||| [S@0:5,1] ||| [S@0:5,1] ||| ", "tails":[ 6 ], "f":{} }
+{"head":0, "rule":"[NP] ||| ich ||| i ||| logp=-0.5 use_i=1.0", "tails":[ -1 ], "f":{"logp":-0.5, "use_i":1.0}, "weight":0.367879441171 },
+{"head":1, "rule":"[V] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0", "tails":[ -1 ], "f":{"logp":-0.25, "use_saw":1.0}, "weight":0.606530659713 },
+{"head":2, "rule":"[JJ] ||| kleines ||| small ||| logp=0.0 use_small=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_small":1.0}, "weight":1.0 },
+{"head":2, "rule":"[JJ] ||| kleines ||| little ||| logp=0.0 use_little=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_little":1.0}, "weight":1.0 },
+{"head":3, "rule":"[NN] ||| kleines haus ||| small house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0}, "weight":1.0 },
+{"head":3, "rule":"[NN] ||| kleines haus ||| little house ||| logp=0.0 use_house=1.0", "tails":[ -1 ], "f":{"logp":0.0, "use_house":1.0}, "weight":1.0 },
+{"head":3, "rule":"[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0.0 use_house=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_house":1.0}, "weight":1.0 },
+{"head":3, "rule":"[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0.0 use_shell=1.0", "tails":[ 2 ], "f":{"logp":0.0, "use_shell":1.0}, "weight":2.71828182846 },
+{"head":4, "rule":"[NP] ||| ein [NN,1] ||| a [1] ||| logp=0.0 use_a=1.0", "tails":[ 3 ], "f":{"logp":0.0, "use_a":1.0}, "weight":1.0 },
+{"head":5, "rule":"[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0.0", "tails":[ 1,4 ], "f":{"logp":0.0}, "weight":1.0 },
+{"head":6, "rule":"[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0.0", "tails":[ 0,5 ], "f":{"logp":0.0}, "weight":1.0 },
+{"head":7, "rule":"[Goal] ||| [S,1] ||| [1] ||| ", "tails":[ 6 ], "f":{}, "weight":1.0 }
]
}
diff --git a/grammar.rb b/grammar.rb
index 70e8fda..003512c 100644
--- a/grammar.rb
+++ b/grammar.rb
@@ -13,23 +13,17 @@ class T
end
class NT
- attr_accessor :symbol, :index, :left, :right
+ attr_accessor :symbol, :index
- def initialize symbol=nil, index=nil, left=nil, right=nil
+ def initialize symbol=nil, index=nil
@symbol = symbol
@index = index
- @left = left
- @right = right
end
def from_s s
- s.delete! '[]'
- @symbol, meta = s.split '@'
- if meta
- span, index = meta.split ','
- @left, @right = span.split(':').map { |x| x.to_i }
- @index = index.to_i
- end
+ @symbol, @index = s.delete('[]').split ','
+ @symbol.strip!
+ @index = @index.to_i-1
end
def self.from_s s
@@ -39,22 +33,23 @@ class NT
end
def to_s
- "NT(#{@left},#{@right})<#{@symbol},#{@index}>"
+ "NT<#{@symbol},#{@index}>"
end
end
class Rule
attr_accessor :lhs, :rhs, :target, :map
- def initialize lhs=nil, rhs=[], target=[]
+ def initialize lhs=nil, rhs=nil, target=nil, map=nil
@lhs = lhs
@rhs = rhs
@target = target
+ @map = (map ? map : [])
@arity_ = nil
end
def to_s
- "#{@lhs} -> #{@rhs.map{ |i| i.to_s }.join ' '} ||| #{@target.map{ |i| i.to_s }.join ' '} [arity=#{arity}]"
+ "#{@lhs.to_s} -> #{@rhs.map{ |i| i.to_s }.join ' '} ||| #{@target.map{ |i| i.to_s }.join ' '} [arity=#{arity}]"
end
def arity
@@ -62,12 +57,13 @@ class Rule
return @arity_
end
- def read_right_ s
+ def read_right_ s, fill_map=false
_ = []
s.split.each { |x|
x.strip!
if x[0]=='[' && x[x.size-1] == ']'
_ << NT.from_s(x)
+ @map << _.last.index if fill_map
else
_ << T.new(x)
end
@@ -79,10 +75,10 @@ class Rule
lhs, rhs, target = splitpipe s, 3
@lhs = NT.from_s lhs
@rhs = read_right_ rhs
- @target = read_right_ target
+ @target = read_right_ target, true
end
- def self.from_s s
+ def self.from_s_x s
r = self.new
r.from_s s
return r
diff --git a/hg.rb b/hg.rb
index f6af75d..43dbf79 100644
--- a/hg.rb
+++ b/hg.rb
@@ -8,17 +8,20 @@ module HG
class HG::Node
- attr_accessor :id, :outgoing, :incoming, :score
+ attr_accessor :id, :symbol, :left, :right, :outgoing, :incoming, :score
- def initialize id=nil, cat=nil, outgoing=[], incoming=[], score=nil
- @id = id
+ def initialize id=nil, symbol='', span=[-1,-1], outgoing=[], incoming=[], score=nil
+ @id = id
+ @symbol = symbol
+ @left = span[0]
+ @right = span[1]
@outgoing = outgoing
@incoming = incoming
- @score = nil
+ @score = score
end
def to_s
- "Node<id:#{@id}, outgoing:#{@outgoing.size}, incoming:#{@incoming.size}>"
+ "Node<id=#{@id}, symbol='#{symbol}', span=(#{@left},#{@right}), outgoing:#{@outgoing.size}, incoming:#{@incoming.size}>"
end
end
@@ -41,20 +44,20 @@ class HG::Hypergraph
end
def to_s
- "Hypergraph<nodes:#{@nodes.size}, edges:#{@edges.size}, arity:#{arity}>"
+ "Hypergraph<nodes:#{@nodes.size}, edges:#{@edges.size}, arity=#{arity}>"
end
end
class HG::Hyperedge
attr_accessor :head, :tails, :score, :f, :mark, :rule
- def initialize head=nil, tails=[], score=0.0, f=SparseVector.new, rule=nil
+ def initialize head=Node.new, tails=[], score=0.0, f=SparseVector.new, rule=nil
@head = head
@tails = tails
@score = score
@f = f
@mark = 0
- @rule = (rule ? Grammar::Rule.from_s(rule) : nil)
+ @rule = (rule.class==String ? Grammar::Rule.from_s(rule) : rule)
end
def arity
@@ -66,7 +69,7 @@ class HG::Hyperedge
end
def to_s
- "Hyperedge<head:\"#{@head.id}\", rule:\"#{@rule.to_s}, \"tails:#{@tails.map{|n|n.id}}, arity:#{arity}, score:#{@score}, f:#{f.to_s}, mark:#{@mark}>"
+ "Hyperedge<head=#{@head.id}, rule:'#{@rule.to_s}', tails=#{@tails.map{|n|n.id}}, arity=#{arity}, score=#{@score}, f=#{f.to_s}, mark=#{@mark}>"
end
end
@@ -124,27 +127,6 @@ def HG::viterbi_path hypergraph, root, semiring=ViterbiSemiring.new
return best_path, toposorted.last.score
end
-def HG::viterbi_string hypergraph, root, semiring=ViterbiSemiring.new
- toposorted = topological_sort hypergraph.nodes
- init toposorted, semiring, root
- s = ''
- toposorted.each { |n|
- best_s = nil
- n.incoming.each { |e|
- s = semiring.one
- e.tails.each { |m|
- s = semiring.multiply.call(s, m.score)
- }
- if n.score < semiring.multiply.call(s, e.score) # ViterbiSemiring add
- best_s = e.e
- end
- n.score = semiring.add.call(n.score, semiring.multiply.call(s, e.score))
- }
- s += best_s if best_s
- }
- return s, toposorted.last.score
-end
-
def HG::all_paths hypergraph, root
toposorted = topological_sort hypergraph.nodes
paths = [[]]
@@ -162,6 +144,22 @@ def HG::all_paths hypergraph, root
return paths
end
+def HG::derive path, cur, carry
+ edge = path.select { |e| e.head.symbol==cur.symbol \
+ && e.head.left==cur.left \
+ && e.head.right==cur.right }.first
+ j = 0
+ edge.rule.target.each { |i|
+ if i.class == Grammar::NT
+ derive path, edge.tails[j], carry
+ j += 1
+ else
+ carry << i
+ end
+ }
+ return carry
+end
+
def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=false
nodes = []
edges = []
@@ -169,7 +167,7 @@ def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=fal
h = JSON.parse File.new(fn).read
w = SparseVector.from_h h['weights']
h['nodes'].each { |x|
- n = Node.new x['id']
+ n = Node.new x['id'], x['symbol'], x['span']
nodes << n
nodes_by_id[n.id] = n
}
@@ -181,9 +179,9 @@ def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=fal
x['rule'])
if x['f']
if log_weights
- e.weight = Math.exp(w.dot(e.f))
+ e.score = Math.exp(w.dot(e.f))
else
- e.weight = w.dot(e.f)
+ e.score = w.dot(e.f)
end
end
e.tails.each { |m|
@@ -195,20 +193,6 @@ def HG::read_hypergraph_from_json fn, semiring=RealSemiring.new, log_weights=fal
return Hypergraph.new(nodes, edges), nodes_by_id
end
-def HG::derive path, cur, carry
- edge = path.select { |e| e.rule.lhs.symbol==cur.symbol \
- && e.rule.lhs.left==cur.left \
- && e.rule.lhs.right==cur.right }.first
- edge.rule.target.each { |i|
- if i.class == Grammar::NT
- derive path, i, carry
- else
- carry << i
- end
- }
- return carry
-end
-
end #module
diff --git a/test.rb b/test.rb
index d9fbdfa..e65c9b3 100755
--- a/test.rb
+++ b/test.rb
@@ -5,14 +5,15 @@ require_relative 'hg'
semiring = ViterbiSemiring.new
hypergraph, nodes_by_id = HG::read_hypergraph_from_json('example/json/test.json', semiring, true)
-#path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring
-#s = HG::derive path, path.last.rule.lhs, []
-#puts "#{s.map { |i| i.word }.join ' '} ||| #{score}"
+path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring
+s = HG::derive path, path.last.head, []
+puts "#{s.map { |i| i.word }.join ' '}"
+puts
hypergraph.reset
paths = HG::all_paths hypergraph, nodes_by_id[-1]
paths.each { |p|
- s = HG::derive p, p.last.rule.lhs, []
+ s = HG::derive p, p.last.head, []
puts "#{s.map { |i| i.word }.join ' '}"
}
diff --git a/util/cdec_hg_to_json.py b/util/cdec_hg_to_json.py
index 4e407c8..2fcc409 100755
--- a/util/cdec_hg_to_json.py
+++ b/util/cdec_hg_to_json.py
@@ -13,15 +13,16 @@ def hg2json(hg, weights):
res += '"weights":{'+"\n"
a = []
for i in weights:
- a.append( '"%s":%s'%(i[0], i[1]) )
+ if i[1] != 0:
+ a.append( '"%s":%s'%(i[0], i[1]) )
res += ", ".join(a)+"\n"
res += "},\n"
res += '"nodes":'+"\n"
res += "[\n"
a = []
- a.append( '{ "label":"root", "cat":"root" }' )
+ a.append( '{ "id":-1, "cat":"root", "span":[-1,-1] }' )
for i in hg.nodes:
- a.append( '{ "label":"%s", "cat":"%s", "left":%d, "right":%d }'%(i.id, i.cat, i.span[0], i.span[1]) )
+ a.append('{ "id":%d, "cat":"%s", "span":[%d,%d] }'%(i.id, i.cat, i.span[0], i.span[1]))
res += ",\n".join(a)+"\n"
res += "],\n"
res += '"edges":'+"\n"
@@ -29,31 +30,20 @@ def hg2json(hg, weights):
a = []
for i in hg.edges:
s = "{"
- s += '"head":"%s"'%(i.head_node.id)
+ s += '"head":%d'%(i.head_node.id)
s += ', "rule":"%s"'%(i.trule)
- s += ', "left":%d'%(i.span[0])
- s += ', "right":%d'%(i.span[1])
- #s += ', "leftx":%d'%(i.src_span[0])
- #s += ', "rightx":%d'%(i.src_span[1])
- s += ', "spans":"'
- q = 0
- for z in i.tail_nodes:
- s+= "%s|||%d|||(%d,%d);"%(z.cat, q, z.span[0], z.span[1])
- q += 1
- s += '"'
+ # f
xs = ' "f":{'
b = []
for j in i.feature_values:
b.append( '"%s":%s'%(j[0], j[1]) )
xs += ", ".join(b)
xs += "},"
- c = []
- for j in i.tail_nodes:
- c.append( '"'+str(j.id)+'"' )
- if len(c) > 0:
- s += ', "tails":[ %s ],'%(",".join(c))
+ # tails
+ if len(list(i.tail_nodes)) > 0:
+ s += ', "tails":[ %s ],'%(",".join([str(n.id) for n in i.tail_nodes]))
else:
- s += ', "tails":[ "root" ],'
+ s += ', "tails":[ -1 ],'
s += xs
s += ' "weight":%s }'%(i.prob)
a.append(s)