init

author: Patrick Simianer <p@simianer.de> 2014-06-14 16:46:27 +0200
committer: Patrick Simianer <p@simianer.de> 2014-06-14 16:46:27 +0200
commit: 26c490f404731d053a6205719b6246502c07b449 (patch)
tree: 3aa721098f1251dfbf2249ecd2736434c13b1d48 /overlapping_rules/util.rb
1 files changed, 116 insertions, 0 deletions
diff --git a/overlapping_rules/util.rb b/overlapping_rules/util.rb
new file mode 100644
index 0000000..5f1249a
--- /dev/null
+++ b/overlapping_rules/util.rb
@@ -0,0 +1,116 @@
+class Rule
+  attr_accessor :nt, :f, :e, :features, :alignment, :id
+
+  def initialize(s=nil, id=-1)
+    return if !s
+    @id = id
+    a = s.strip.split ' ||| '
+    @nt = a[0].strip
+    @f = a[1].split.map{|i| i.strip}
+    @e = a[2].split.map{|i| i.strip}
+    @features = {}
+    a[3].split.each { |i|
+      name,value = i.split '='
+      @features[name] = value.to_f
+    }
+    @alignment = a[4].strip
+  end
+
+  def to_s
+    feature_string = []
+    @features.each_pair { |name,value| feature_string << "#{name}=#{value}" } if @features
+    feature_string = feature_string.join ' '
+    return "#{@nt} ||| #{f.join ' '} ||| #{@e.join ' '} ||| #{feature_string} ||| #{@alignment}"
+  end
+
+  def rule_id_string
+    return "#{@f.join '_'}|||#{@e.join '_'}"
+  end
+
+  def fe_word_pairs
+    a = []
+    @f.each { |i|
+      next if i.match('\[X,\d\]')
+      @e.each { |j|
+        next if j.match('\[X,\d\]')
+        a << "#{[i,j].sort.join '|||'}"
+      }
+    }
+    return a.uniq # we do not want duplicates
+  end
+end
+
+
+class Range
+  attr_accessor :from, :to
+  def initialize
+    @from = nil
+    @to = nil
+  end
+  def to_s
+    return "#{@from}--#{@to}"
+  end
+  def correct(n)
+    t = @from
+    @from = n - @to
+    @to = n - t
+  end
+end
+
+
+def ignore(rule)
+  return true if (rule.f.first.match('\[X,\d\]')&&rule.f.last.match('\[X,\d\]')|| \
+                  rule.e.first.match('\[X,\d\]')&&rule.e.last.match('\[X,\d\]'))
+  return false
+end
+
+
+def read_rules_from_file f, fn, ids=nil
+  STDERR.puts "reporter:status:reading rules from #{fn}"
+  rules = []
+  i = 0
+  while line = f.gets
+    id, data = line.split "\t"
+    id = id.to_i
+    r = Rule.new(data, id)
+    next if ignore(r)
+    rules << r
+    ids[r.rule_id_string]=true if ids
+    i += 1
+    STDERR.puts "reporter:status:reading rules from #{fn} (already read #{i} lines)" if i%10===0
+  end
+  f.close
+  return rules
+end
+
+
+def read_rules_from_file1 f, fn, ids=nil
+  i = 0
+  while line = f.gets
+    id, data = line.split "\t"
+    id = id.to_i
+    r = Rule.new(data, id)
+    next if ignore(r)
+    yield r
+    ids[r.rule_id_string]=true if ids
+    i += 1
+  end
+  f.close
+end
+
+
+def read_rules_from_file2 f, fn, ids=nil
+  i = 0
+  while line = f.gets
+    word_pair_key, data = line.split "\t"
+    id, rule_str = data.split " ||| ", 2
+    id = id.to_i
+    r = Rule.new(rule_str, id)
+    next if ignore(r) # prevent overhead later on
+    yield word_pair_key, r
+    ids[r.rule_id_string]=true if ids
+    i += 1
+  end
+  f.close
+end
+
author	Patrick Simianer <p@simianer.de>	2014-06-14 16:46:27 +0200
committer	Patrick Simianer <p@simianer.de>	2014-06-14 16:46:27 +0200
commit	26c490f404731d053a6205719b6246502c07b449 (patch)
tree	3aa721098f1251dfbf2249ecd2736434c13b1d48 /overlapping_rules/util.rb