diff options
Diffstat (limited to 'derivation_to_json/rec.rb')
-rwxr-xr-x | derivation_to_json/rec.rb | 79 |
1 files changed, 0 insertions, 79 deletions
diff --git a/derivation_to_json/rec.rb b/derivation_to_json/rec.rb deleted file mode 100755 index 84bdc0d..0000000 --- a/derivation_to_json/rec.rb +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env ruby - -require 'json' -require 'zipf' - - -before = JSON.parse(ReadFile.read('in7.json')) -after = JSON.parse(ReadFile.read('out7.json')) - -alignment = {} -after["align"].each { |i| - a,b = i.split '-' - a = a.to_i - b = b.to_i - if alignment[a] - alignment[a] << b - else - alignment[a] = [b] - end -} - -srg2idx = {} -before['source_rgroups'].uniq.each { |k| - srg2idx[k] = [] - before['source_rgroups'].each_with_index { |i,j| - if i==k - srg2idx[k] << j - end - } -} - -def get_target_phrases_for_source_span before, after, alignment, v, dontsort=false - a = [] - tgt = [] - target_phrases = [] # alignment seen from target - v.each { |i| - a << after["source"][i] - target_phrases << alignment[i].first if alignment[i] - } - target_phrases.sort! if !dontsort - target_phrases.each { |j| - tgt << after["target"][j] - } - - return a, tgt, target_phrases -end - - -# k is a rule id in after['rules_by_span_id'] -srg2idx.each_pair { |k,v| - a, tgt, target_phrases = get_target_phrases_for_source_span before, after, alignment, v - rule_before = before['rules_by_span_id'][k.to_s] - src_side_before = splitpipe(rule_before)[1] - x = src_side_before.split - a.first.insert(0, " [X] ") if x[0] == "[X]" - a[a.size-1] += " [X] " if x[x.size-1] == "[X]" - puts rule_before - puts "#{k} #{a.join " [X] "}" - puts tgt.to_s - puts before["span_info"][k.to_s].to_s - puts "target phrases #{target_phrases}" - s = "" - target_phrases.uniq.each { |j| s += after["target"][j]+" " } - puts "S: #{s}" - puts "nothing to do" if before["span_info"][k.to_s][1].size==0 - target_phrase_sub = [] - before["span_info"][k.to_s][1].each { |subspan| - puts subspan.to_s - subid = before["span2id"][subspan.to_s] - puts "subid #{subid}" - puts "XXX #{srg2idx[subid]}" - _, _, tp = get_target_phrases_for_source_span before, after, alignment, srg2idx[subid], true - target_phrase_sub << tp - } - puts "targ ph sub #{target_phrase_sub.to_s}" - puts "---" - puts -} - |