class Rule attr_accessor :nt, :f, :e, :features, :alignment, :id def initialize(s=nil, id=-1) return if !s @id = id a = s.strip.split ' ||| ' @nt = a[0].strip @f = a[1].split.map{|i| i.strip} @e = a[2].split.map{|i| i.strip} @features = {} a[3].split.each { |i| name,value = i.split '=' @features[name] = value.to_f } @alignment = a[4].strip end def to_s feature_string = [] @features.each_pair { |name,value| feature_string << "#{name}=#{value}" } if @features feature_string = feature_string.join ' ' return "#{@nt} ||| #{f.join ' '} ||| #{@e.join ' '} ||| #{feature_string} ||| #{@alignment}" end def rule_id_string return "#{@f.join '_'}|||#{@e.join '_'}" end def fe_word_pairs a = [] @f.each { |i| next if i.match('\[X,\d\]') @e.each { |j| next if j.match('\[X,\d\]') a << "#{[i,j].sort.join '|||'}" } } return a.uniq # we do not want duplicates end end class Range attr_accessor :from, :to def initialize @from = nil @to = nil end def to_s return "#{@from}--#{@to}" end def correct(n) t = @from @from = n - @to @to = n - t end end def ignore(rule) return true if (rule.f.first.match('\[X,\d\]')&&rule.f.last.match('\[X,\d\]')|| \ rule.e.first.match('\[X,\d\]')&&rule.e.last.match('\[X,\d\]')) return false end def read_rules_from_file f, fn, ids=nil STDERR.puts "reporter:status:reading rules from #{fn}" rules = [] i = 0 while line = f.gets id, data = line.split "\t" id = id.to_i r = Rule.new(data, id) next if ignore(r) rules << r ids[r.rule_id_string]=true if ids i += 1 STDERR.puts "reporter:status:reading rules from #{fn} (already read #{i} lines)" if i%10===0 end f.close return rules end def read_rules_from_file1 f, fn, ids=nil i = 0 while line = f.gets id, data = line.split "\t" id = id.to_i r = Rule.new(data, id) next if ignore(r) yield r ids[r.rule_id_string]=true if ids i += 1 end f.close end def read_rules_from_file2 f, fn, ids=nil i = 0 while line = f.gets word_pair_key, data = line.split "\t" id, rule_str = data.split " ||| ", 2 id = id.to_i r = Rule.new(rule_str, id) next if ignore(r) # prevent overhead later on yield word_pair_key, r ids[r.rule_id_string]=true if ids i += 1 end f.close end