overlapping_rules/util.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

class Rule
  attr_accessor :nt, :f, :e, :features, :alignment, :id

  def initialize(s=nil, id=-1)
    return if !s
    @id = id
    a = s.strip.split ' ||| '
    @nt = a[0].strip
    @f = a[1].split.map{|i| i.strip}
    @e = a[2].split.map{|i| i.strip}
    @features = {}
    a[3].split.each { |i|
      name,value = i.split '='
      @features[name] = value.to_f
    }
    @alignment = a[4].strip
  end

  def to_s
    feature_string = []
    @features.each_pair { |name,value| feature_string << "#{name}=#{value}" } if @features
    feature_string = feature_string.join ' '
    return "#{@nt} ||| #{f.join ' '} ||| #{@e.join ' '} ||| #{feature_string} ||| #{@alignment}"
  end

  def rule_id_string
    return "#{@f.join '_'}|||#{@e.join '_'}"
  end

  def fe_word_pairs
    a = []
    @f.each { |i|
      next if i.match('\[X,\d\]')
      @e.each { |j|
        next if j.match('\[X,\d\]')
        a << "#{[i,j].sort.join '|||'}"
      }
    }
    return a.uniq # we do not want duplicates
  end
end


class Range
  attr_accessor :from, :to
  def initialize
    @from = nil
    @to = nil
  end
  def to_s
    return "#{@from}--#{@to}"
  end
  def correct(n)
    t = @from
    @from = n - @to
    @to = n - t
  end
end


def ignore(rule)
  return true if (rule.f.first.match('\[X,\d\]')&&rule.f.last.match('\[X,\d\]')|| \
                  rule.e.first.match('\[X,\d\]')&&rule.e.last.match('\[X,\d\]'))
  return false
end


def read_rules_from_file f, fn, ids=nil
  STDERR.puts "reporter:status:reading rules from #{fn}"
  rules = []
  i = 0
  while line = f.gets
    id, data = line.split "\t"
    id = id.to_i
    r = Rule.new(data, id)
    next if ignore(r)
    rules << r
    ids[r.rule_id_string]=true if ids
    i += 1
    STDERR.puts "reporter:status:reading rules from #{fn} (already read #{i} lines)" if i%10===0
  end
  f.close
  return rules
end


def read_rules_from_file1 f, fn, ids=nil
  i = 0
  while line = f.gets
    id, data = line.split "\t"
    id = id.to_i
    r = Rule.new(data, id)
    next if ignore(r)
    yield r
    ids[r.rule_id_string]=true if ids
    i += 1
  end
  f.close
end


def read_rules_from_file2 f, fn, ids=nil
  i = 0
  while line = f.gets
    word_pair_key, data = line.split "\t"
    id, rule_str = data.split " ||| ", 2
    id = id.to_i
    r = Rule.new(rule_str, id)
    next if ignore(r) # prevent overhead later on
    yield word_pair_key, r
    ids[r.rule_id_string]=true if ids
    i += 1
  end
  f.close
end