From db6a6ecfa350cae29739c59df1210d8f76a479c9 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Thu, 5 Dec 2013 07:56:38 +0100 Subject: init --- min_max | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100755 min_max (limited to 'min_max') diff --git a/min_max b/min_max new file mode 100755 index 0000000..f27de88 --- /dev/null +++ b/min_max @@ -0,0 +1,47 @@ +#!/usr/bin/ruby + +require 'trollop' + + +STDIN.set_encoding 'utf-8' +STDOUT.set_encoding 'utf-8' + +def usage + puts "filter-min-max.rb --min --max --in_f --in_e --out_f --out_e --out_id " +end +usage if ARGV.size!=14 + +opts = Trollop::options do + opt :min, "minimum #tokens", :type => :int, :default => 1 + opt :max, "maximum #tokens", :type => :int, :default => 80 + opt :in_f "input 'French' file", :type => string + opt :in_e "input 'English' file", :type => string + opt :out_f "output 'French' file", :type => string + opt :out_e "output 'English' file", :type => string + opt :out_id "output line Nos", :type => string +end + + +files = {} +files[:f_file] = File.new opts[:in_f], 'r:UTF-8' +files[:e_file] = File.new opts[:in_e], 'r:UTF-8' +files[:f_out_file] = File.new opts[:out_f], 'w:UTF-8' +files[:e_out_file] = File.new opts[:out_e], 'w:UTF-8' +files[:id_out_file] = File.new opts[:out_id], 'w' +i = 0 +while f_line = files[:f_file].gets + e_line = files[:e_file].gets + f_line.strip! + e_line.strip! + a = f_line.split + b = e_line.split + if a.size >= opts[:min] and a.size <= opts[:max] and \ + b.size >= opts[:min] and b.size <= opts[:max] + files[:f_out_file].write "#{f_line}\n" + files[:e_out_file].write "#{e_line}\n" + files[:id_out_file].write "#{i}\n" + end + i+=1 +end +files.values.each{|f|f.close} + -- cgit v1.2.3