From db6a6ecfa350cae29739c59df1210d8f76a479c9 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Thu, 5 Dec 2013 07:56:38 +0100
Subject: init
---
min_max | 47 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 47 insertions(+)
create mode 100755 min_max
(limited to 'min_max')
diff --git a/min_max b/min_max
new file mode 100755
index 0000000..f27de88
--- /dev/null
+++ b/min_max
@@ -0,0 +1,47 @@
+#!/usr/bin/ruby
+
+require 'trollop'
+
+
+STDIN.set_encoding 'utf-8'
+STDOUT.set_encoding 'utf-8'
+
+def usage
+ puts "filter-min-max.rb --min --max --in_f --in_e --out_f --out_e --out_id "
+end
+usage if ARGV.size!=14
+
+opts = Trollop::options do
+ opt :min, "minimum #tokens", :type => :int, :default => 1
+ opt :max, "maximum #tokens", :type => :int, :default => 80
+ opt :in_f "input 'French' file", :type => string
+ opt :in_e "input 'English' file", :type => string
+ opt :out_f "output 'French' file", :type => string
+ opt :out_e "output 'English' file", :type => string
+ opt :out_id "output line Nos", :type => string
+end
+
+
+files = {}
+files[:f_file] = File.new opts[:in_f], 'r:UTF-8'
+files[:e_file] = File.new opts[:in_e], 'r:UTF-8'
+files[:f_out_file] = File.new opts[:out_f], 'w:UTF-8'
+files[:e_out_file] = File.new opts[:out_e], 'w:UTF-8'
+files[:id_out_file] = File.new opts[:out_id], 'w'
+i = 0
+while f_line = files[:f_file].gets
+ e_line = files[:e_file].gets
+ f_line.strip!
+ e_line.strip!
+ a = f_line.split
+ b = e_line.split
+ if a.size >= opts[:min] and a.size <= opts[:max] and \
+ b.size >= opts[:min] and b.size <= opts[:max]
+ files[:f_out_file].write "#{f_line}\n"
+ files[:e_out_file].write "#{e_line}\n"
+ files[:id_out_file].write "#{i}\n"
+ end
+ i+=1
+end
+files.values.each{|f|f.close}
+
--
cgit v1.2.3