diff options
author | Patrick Simianer <p@simianer.de> | 2015-11-11 16:10:58 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2015-11-11 16:10:58 +0100 |
commit | 25674bcbde962f6fc27448af147b88b853a168f7 (patch) | |
tree | 4e95781a163c7790a10bb3a1af0982c2d42a7a89 /derivation_to_json/rec.rb | |
parent | fbf4cb550ebdcefc4552167e5c6938a5fce2b86d (diff) |
extract rules from post-edit alignment
Diffstat (limited to 'derivation_to_json/rec.rb')
-rwxr-xr-x | derivation_to_json/rec.rb | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/derivation_to_json/rec.rb b/derivation_to_json/rec.rb new file mode 100755 index 0000000..677a02a --- /dev/null +++ b/derivation_to_json/rec.rb @@ -0,0 +1,50 @@ +#!/usr/bin/env ruby + +require 'json' +require 'zipf' + + +before = JSON.parse(ReadFile.read('x.json')) +after = JSON.parse(ReadFile.read('y.json')) + +alignment = {} +after["align"].each { |i| + a,b = i.split '-' + a = a.to_i + b = b.to_i + if alignment[a] + alignment[a] << b + else + alignment[a] = [b] + end +} + +srg2idx = {} +before['source_rgroups'].uniq.each { |k| + srg2idx[k] = [] + before['source_rgroups'].each_with_index { |i,j| + if i==k + srg2idx[k] << j + end + } +} + +srg2idx.each_pair { |k,v| + a = [] + tgt = [] + v.each { |i| + a << after["source"][i] + tgt << after["target"][alignment[i].first] + } + rule_before = before['rules_by_span_id'][k.to_s] + src_side_before = splitpipe(rule_before)[1] + x = src_side_before.split + a.first.insert(0, " [X] ") if x[0] == "[X]" + a[a.size-1] += " [X] " if x[x.size-1] == "[X]" + puts rule_before + puts "#{k} #{a.join " [X] "}" + puts tgt.to_s + puts "---" + puts +} + |