1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
#!/usr/bin/env ruby
require 'zipf'
require 'trollop'
def main
conf = Trollop::options do
opt :inputs, "inputs, comma separated", :type => :string, :short => "-i", :required => true
opt :output_suffix, "output suffix", :type => :string, :default => ".out", :short => "-S"
opt :min_len, "minimum length", :type => :int, :default => 1, :short => "-m"
opt :max_len, "maximum length", :type => :int, :default => 1000, :short => "-M"
opt :ignore_below, "minimum length to apply ratio test", :type => :int, :default => 7, :short => "-I"
opt :ratio_mean, "length ratio average", :type => :float, :required => true, :short => "-A"
opt :ratio_stddev, "length ratio standard deviation", :type => :float, :required => true, :short => "-T"
end
fna,fnb = conf[:inputs].split ','
a = ReadFile.new fna
b = ReadFile.new fnb
a_out = WriteFile.new fna+conf[:output_suffix]
b_out = WriteFile.new fnb+conf[:output_suffix]
ratio_lower = conf[:ratio_mean] - conf[:ratio_stddev]
ratio_upper = conf[:ratio_mean] + conf[:ratio_stddev]
while linea = a.gets
lineb = b.gets
sza = linea.strip.split.size
szb = lineb.strip.split.size
ratio = sza.to_f/szb.to_f
if sza <= conf[:ignore_below] and szb <= conf[:ignore_below] or
(sza >= conf[:min_len] and szb >= conf[:min_len] and
sza <= conf[:max_len] and szb <= conf[:max_len] and
ratio >= ratio_lower and
ratio <= ratio_upper)
a_out.write linea
b_out.write lineb
end
end
a.close
b.close
a_out.close
b_out.close
end
main
|