From 26c490f404731d053a6205719b6246502c07b449 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sat, 14 Jun 2014 16:46:27 +0200 Subject: init --- overlapping_rules/util.rb | 116 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 overlapping_rules/util.rb (limited to 'overlapping_rules/util.rb') diff --git a/overlapping_rules/util.rb b/overlapping_rules/util.rb new file mode 100644 index 0000000..5f1249a --- /dev/null +++ b/overlapping_rules/util.rb @@ -0,0 +1,116 @@ +class Rule + attr_accessor :nt, :f, :e, :features, :alignment, :id + + def initialize(s=nil, id=-1) + return if !s + @id = id + a = s.strip.split ' ||| ' + @nt = a[0].strip + @f = a[1].split.map{|i| i.strip} + @e = a[2].split.map{|i| i.strip} + @features = {} + a[3].split.each { |i| + name,value = i.split '=' + @features[name] = value.to_f + } + @alignment = a[4].strip + end + + def to_s + feature_string = [] + @features.each_pair { |name,value| feature_string << "#{name}=#{value}" } if @features + feature_string = feature_string.join ' ' + return "#{@nt} ||| #{f.join ' '} ||| #{@e.join ' '} ||| #{feature_string} ||| #{@alignment}" + end + + def rule_id_string + return "#{@f.join '_'}|||#{@e.join '_'}" + end + + def fe_word_pairs + a = [] + @f.each { |i| + next if i.match('\[X,\d\]') + @e.each { |j| + next if j.match('\[X,\d\]') + a << "#{[i,j].sort.join '|||'}" + } + } + return a.uniq # we do not want duplicates + end +end + + +class Range + attr_accessor :from, :to + def initialize + @from = nil + @to = nil + end + def to_s + return "#{@from}--#{@to}" + end + def correct(n) + t = @from + @from = n - @to + @to = n - t + end +end + + +def ignore(rule) + return true if (rule.f.first.match('\[X,\d\]')&&rule.f.last.match('\[X,\d\]')|| \ + rule.e.first.match('\[X,\d\]')&&rule.e.last.match('\[X,\d\]')) + return false +end + + +def read_rules_from_file f, fn, ids=nil + STDERR.puts "reporter:status:reading rules from #{fn}" + rules = [] + i = 0 + while line = f.gets + id, data = line.split "\t" + id = id.to_i + r = Rule.new(data, id) + next if ignore(r) + rules << r + ids[r.rule_id_string]=true if ids + i += 1 + STDERR.puts "reporter:status:reading rules from #{fn} (already read #{i} lines)" if i%10===0 + end + f.close + return rules +end + + +def read_rules_from_file1 f, fn, ids=nil + i = 0 + while line = f.gets + id, data = line.split "\t" + id = id.to_i + r = Rule.new(data, id) + next if ignore(r) + yield r + ids[r.rule_id_string]=true if ids + i += 1 + end + f.close +end + + +def read_rules_from_file2 f, fn, ids=nil + i = 0 + while line = f.gets + word_pair_key, data = line.split "\t" + id, rule_str = data.split " ||| ", 2 + id = id.to_i + r = Rule.new(rule_str, id) + next if ignore(r) # prevent overhead later on + yield word_pair_key, r + ids[r.rule_id_string]=true if ids + i += 1 + end + f.close +end + -- cgit v1.2.3