#!/usr/bin/ruby require 'trollop' STDIN.set_encoding 'utf-8' STDOUT.set_encoding 'utf-8' def usage puts "filter-min-max.rb --min --max --in_f --in_e --out_f --out_e --out_id " end usage if ARGV.size!=14 opts = Trollop::options do opt :min, "minimum #tokens", :type => :int, :default => 1 opt :max, "maximum #tokens", :type => :int, :default => 80 opt :in_f "input 'French' file", :type => string opt :in_e "input 'English' file", :type => string opt :out_f "output 'French' file", :type => string opt :out_e "output 'English' file", :type => string opt :out_id "output line Nos", :type => string end files = {} files[:f_file] = File.new opts[:in_f], 'r:UTF-8' files[:e_file] = File.new opts[:in_e], 'r:UTF-8' files[:f_out_file] = File.new opts[:out_f], 'w:UTF-8' files[:e_out_file] = File.new opts[:out_e], 'w:UTF-8' files[:id_out_file] = File.new opts[:out_id], 'w' i = 0 while f_line = files[:f_file].gets e_line = files[:e_file].gets f_line.strip! e_line.strip! a = f_line.split b = e_line.split if a.size >= opts[:min] and a.size <= opts[:max] and \ b.size >= opts[:min] and b.size <= opts[:max] files[:f_out_file].write "#{f_line}\n" files[:e_out_file].write "#{e_line}\n" files[:id_out_file].write "#{i}\n" end i+=1 end files.values.each{|f|f.close}