1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
#!/usr/bin/env ruby
require 'json'
require 'zipf'
before = JSON.parse(ReadFile.read('in7.json'))
after = JSON.parse(ReadFile.read('out7.json'))
alignment = {}
after["align"].each { |i|
a,b = i.split '-'
a = a.to_i
b = b.to_i
if alignment[a]
alignment[a] << b
else
alignment[a] = [b]
end
}
srg2idx = {}
before['source_rgroups'].uniq.each { |k|
srg2idx[k] = []
before['source_rgroups'].each_with_index { |i,j|
if i==k
srg2idx[k] << j
end
}
}
def get_target_phrases_for_source_span before, after, alignment, v, dontsort=false
a = []
tgt = []
target_phrases = [] # alignment seen from target
v.each { |i|
a << after["source"][i]
target_phrases << alignment[i].first if alignment[i]
}
target_phrases.sort! if !dontsort
target_phrases.each { |j|
tgt << after["target"][j]
}
return a, tgt, target_phrases
end
# k is a rule id in after['rules_by_span_id']
srg2idx.each_pair { |k,v|
a, tgt, target_phrases = get_target_phrases_for_source_span before, after, alignment, v
rule_before = before['rules_by_span_id'][k.to_s]
src_side_before = splitpipe(rule_before)[1]
x = src_side_before.split
a.first.insert(0, " [X] ") if x[0] == "[X]"
a[a.size-1] += " [X] " if x[x.size-1] == "[X]"
puts rule_before
puts "#{k} #{a.join " [X] "}"
puts tgt.to_s
puts before["span_info"][k.to_s].to_s
puts "target phrases #{target_phrases}"
s = ""
target_phrases.uniq.each { |j| s += after["target"][j]+" " }
puts "S: #{s}"
puts "nothing to do" if before["span_info"][k.to_s][1].size==0
target_phrase_sub = []
before["span_info"][k.to_s][1].each { |subspan|
puts subspan.to_s
subid = before["span2id"][subspan.to_s]
puts "subid #{subid}"
puts "XXX #{srg2idx[subid]}"
_, _, tp = get_target_phrases_for_source_span before, after, alignment, srg2idx[subid], true
target_phrase_sub << tp
}
puts "targ ph sub #{target_phrase_sub.to_s}"
puts "---"
puts
}
|