summaryrefslogtreecommitdiff
path: root/overlapping_rules/util.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-14 16:46:27 +0200
committerPatrick Simianer <p@simianer.de>2014-06-14 16:46:27 +0200
commit26c490f404731d053a6205719b6246502c07b449 (patch)
tree3aa721098f1251dfbf2249ecd2736434c13b1d48 /overlapping_rules/util.rb
init
Diffstat (limited to 'overlapping_rules/util.rb')
-rw-r--r--overlapping_rules/util.rb116
1 files changed, 116 insertions, 0 deletions
diff --git a/overlapping_rules/util.rb b/overlapping_rules/util.rb
new file mode 100644
index 0000000..5f1249a
--- /dev/null
+++ b/overlapping_rules/util.rb
@@ -0,0 +1,116 @@
+class Rule
+ attr_accessor :nt, :f, :e, :features, :alignment, :id
+
+ def initialize(s=nil, id=-1)
+ return if !s
+ @id = id
+ a = s.strip.split ' ||| '
+ @nt = a[0].strip
+ @f = a[1].split.map{|i| i.strip}
+ @e = a[2].split.map{|i| i.strip}
+ @features = {}
+ a[3].split.each { |i|
+ name,value = i.split '='
+ @features[name] = value.to_f
+ }
+ @alignment = a[4].strip
+ end
+
+ def to_s
+ feature_string = []
+ @features.each_pair { |name,value| feature_string << "#{name}=#{value}" } if @features
+ feature_string = feature_string.join ' '
+ return "#{@nt} ||| #{f.join ' '} ||| #{@e.join ' '} ||| #{feature_string} ||| #{@alignment}"
+ end
+
+ def rule_id_string
+ return "#{@f.join '_'}|||#{@e.join '_'}"
+ end
+
+ def fe_word_pairs
+ a = []
+ @f.each { |i|
+ next if i.match('\[X,\d\]')
+ @e.each { |j|
+ next if j.match('\[X,\d\]')
+ a << "#{[i,j].sort.join '|||'}"
+ }
+ }
+ return a.uniq # we do not want duplicates
+ end
+end
+
+
+class Range
+ attr_accessor :from, :to
+ def initialize
+ @from = nil
+ @to = nil
+ end
+ def to_s
+ return "#{@from}--#{@to}"
+ end
+ def correct(n)
+ t = @from
+ @from = n - @to
+ @to = n - t
+ end
+end
+
+
+def ignore(rule)
+ return true if (rule.f.first.match('\[X,\d\]')&&rule.f.last.match('\[X,\d\]')|| \
+ rule.e.first.match('\[X,\d\]')&&rule.e.last.match('\[X,\d\]'))
+ return false
+end
+
+
+def read_rules_from_file f, fn, ids=nil
+ STDERR.puts "reporter:status:reading rules from #{fn}"
+ rules = []
+ i = 0
+ while line = f.gets
+ id, data = line.split "\t"
+ id = id.to_i
+ r = Rule.new(data, id)
+ next if ignore(r)
+ rules << r
+ ids[r.rule_id_string]=true if ids
+ i += 1
+ STDERR.puts "reporter:status:reading rules from #{fn} (already read #{i} lines)" if i%10===0
+ end
+ f.close
+ return rules
+end
+
+
+def read_rules_from_file1 f, fn, ids=nil
+ i = 0
+ while line = f.gets
+ id, data = line.split "\t"
+ id = id.to_i
+ r = Rule.new(data, id)
+ next if ignore(r)
+ yield r
+ ids[r.rule_id_string]=true if ids
+ i += 1
+ end
+ f.close
+end
+
+
+def read_rules_from_file2 f, fn, ids=nil
+ i = 0
+ while line = f.gets
+ word_pair_key, data = line.split "\t"
+ id, rule_str = data.split " ||| ", 2
+ id = id.to_i
+ r = Rule.new(rule_str, id)
+ next if ignore(r) # prevent overhead later on
+ yield word_pair_key, r
+ ids[r.rule_id_string]=true if ids
+ i += 1
+ end
+ f.close
+end
+