diff options
author | Patrick Simianer <p@simianer.de> | 2015-11-11 16:10:58 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2015-11-11 16:10:58 +0100 |
commit | 25674bcbde962f6fc27448af147b88b853a168f7 (patch) | |
tree | 4e95781a163c7790a10bb3a1af0982c2d42a7a89 /derivation_to_json/rules.rb | |
parent | fbf4cb550ebdcefc4552167e5c6938a5fce2b86d (diff) |
extract rules from post-edit alignment
Diffstat (limited to 'derivation_to_json/rules.rb')
-rwxr-xr-x | derivation_to_json/rules.rb | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/derivation_to_json/rules.rb b/derivation_to_json/rules.rb new file mode 100755 index 0000000..b0d267b --- /dev/null +++ b/derivation_to_json/rules.rb @@ -0,0 +1,42 @@ +#!/usr/bin/env ruby + +require 'zipf' + +src = ['Synergistische', 'pharmazeutische Zusammensetzung enthaltend', 'ein Peptid', 'mit 2 bis 5', 'Aminosaeuren'] +target = ["A", "synergistic", "pharmaceutical composition containing", "a peptide", "with 2 to 5", "amino acis"] +align = [[1], [2], [0,3], [4], [5]] + + +def single_nt a + r = [] + r << a + max_sz = a.size-2 + if max_sz<0 + return r + end + a.each_index { |i| + b = Array.new a + b[i] = "[X]" + r << b + c = Array.new b + (1).upto(a.size-(i+1)) { |k| + c = Array.new c + c.delete_at(i+1) + break if c.size<2 + r << c + } + } + + return r +end + +src.each_with_index { |i,j| + src[j..src.size-1].each_with_index { |k,l| + sub = src[j..(j+l)] + r = single_nt sub + r.each { |i| + puts i.to_s + } + } +} + |