#!/usr/bin/env ruby require 'zipf' require 'trollop' def main conf = Trollop::options do opt :inputs, "inputs, comma separated", :type => :string, :short => "-i", :required => true opt :output_suffix, "output suffix", :type => :string, :default => ".out", :short => "-S" opt :min_len, "minimum length", :type => :int, :default => 1, :short => "-m" opt :max_len, "maximum length", :type => :int, :default => 1000, :short => "-M" #opt :ratio, "length ratio", :type => :float, :default => 0.0001, :short => "-r" #opt :ratio_min_len, "minimum length to apply ratio test", :type => :int, :default => 7, :short => "-R" opt :ratio_avg, "length ratio average", :type => :float, :required => true, :short => "-A" opt :ratio_std, "length ratio standard deviation", :type => :float, :required => true, :short => "-T" end fna,fnb = conf[:inputs].split ',' a = ReadFile.new fna b = ReadFile.new fnb a_out = WriteFile.new fna+conf[:output_suffix] b_out = WriteFile.new fnb+conf[:output_suffix] ratio_lower = conf[:ratio_avg] - conf[:ratio_std] ratio_upper = conf[:ratio_avg] + conf[:ratio_std] while linea = a.gets lineb = b.gets sza = linea.strip.split.size szb = lineb.strip.split.size #_ = [sza,szb].map{|i|i.to_f}.sort ratio = sza.to_f/szb.to_f if sza >= conf[:min_len] and szb >= conf[:min_len] and sza <= conf[:max_len] and szb <= conf[:max_len] and ratio >= ratio_lower and ratio <= ratio_upper #if _[0] >= conf[:ratio_min_len] # ratio_ok = (_[0] / _[1]) >= conf[:ratio] #else # ratio_ok = true #end #if ratio_ok a_out.write linea b_out.write lineb #end end end a.close b.close a_out.close b_out.close end main