1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
class Rule
attr_accessor :nt, :f, :e, :features, :alignment, :id
def initialize(s=nil, id=-1)
return if !s
@id = id
a = s.strip.split ' ||| '
@nt = a[0].strip
@f = a[1].split.map{|i| i.strip}
@e = a[2].split.map{|i| i.strip}
@features = {}
a[3].split.each { |i|
name,value = i.split '='
@features[name] = value.to_f
}
@alignment = a[4].strip
end
def to_s
feature_string = []
@features.each_pair { |name,value| feature_string << "#{name}=#{value}" } if @features
feature_string = feature_string.join ' '
return "#{@nt} ||| #{f.join ' '} ||| #{@e.join ' '} ||| #{feature_string} ||| #{@alignment}"
end
def rule_id_string
return "#{@f.join '_'}|||#{@e.join '_'}"
end
def fe_word_pairs
a = []
@f.each { |i|
next if i.match('\[X,\d\]')
@e.each { |j|
next if j.match('\[X,\d\]')
a << "#{[i,j].sort.join '|||'}"
}
}
return a.uniq # we do not want duplicates
end
end
class Range
attr_accessor :from, :to
def initialize
@from = nil
@to = nil
end
def to_s
return "#{@from}--#{@to}"
end
def correct(n)
t = @from
@from = n - @to
@to = n - t
end
end
def ignore(rule)
return true if (rule.f.first.match('\[X,\d\]')&&rule.f.last.match('\[X,\d\]')|| \
rule.e.first.match('\[X,\d\]')&&rule.e.last.match('\[X,\d\]'))
return false
end
def read_rules_from_file f, fn, ids=nil
STDERR.puts "reporter:status:reading rules from #{fn}"
rules = []
i = 0
while line = f.gets
id, data = line.split "\t"
id = id.to_i
r = Rule.new(data, id)
next if ignore(r)
rules << r
ids[r.rule_id_string]=true if ids
i += 1
STDERR.puts "reporter:status:reading rules from #{fn} (already read #{i} lines)" if i%10===0
end
f.close
return rules
end
def read_rules_from_file1 f, fn, ids=nil
i = 0
while line = f.gets
id, data = line.split "\t"
id = id.to_i
r = Rule.new(data, id)
next if ignore(r)
yield r
ids[r.rule_id_string]=true if ids
i += 1
end
f.close
end
def read_rules_from_file2 f, fn, ids=nil
i = 0
while line = f.gets
word_pair_key, data = line.split "\t"
id, rule_str = data.split " ||| ", 2
id = id.to_i
r = Rule.new(rule_str, id)
next if ignore(r) # prevent overhead later on
yield word_pair_key, r
ids[r.rule_id_string]=true if ids
i += 1
end
f.close
end
|