From 68acbb9a0c7967cb90a7e3756fc94fdd8a73d154 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Wed, 29 Jan 2014 19:14:08 +0100
Subject: make use of nlp_ruby, LICENSE
---
min_max | 42 ++++++++++++++++++------------------------
1 file changed, 18 insertions(+), 24 deletions(-)
(limited to 'min_max')
diff --git a/min_max b/min_max
index f27de88..653cde3 100755
--- a/min_max
+++ b/min_max
@@ -1,33 +1,26 @@
#!/usr/bin/ruby
+require 'nlp_ruby'
require 'trollop'
-STDIN.set_encoding 'utf-8'
-STDOUT.set_encoding 'utf-8'
-
-def usage
- puts "filter-min-max.rb --min --max --in_f --in_e --out_f --out_e --out_id "
-end
-usage if ARGV.size!=14
-
-opts = Trollop::options do
+cfg = Trollop::options do
opt :min, "minimum #tokens", :type => :int, :default => 1
- opt :max, "maximum #tokens", :type => :int, :default => 80
- opt :in_f "input 'French' file", :type => string
- opt :in_e "input 'English' file", :type => string
- opt :out_f "output 'French' file", :type => string
- opt :out_e "output 'English' file", :type => string
- opt :out_id "output line Nos", :type => string
+ opt :max, "maximum #tokens", :type => :int, :default => 80, :short => '-n'
+ opt :in_f, "input 'French' file", :type => :string, :required => true
+ opt :in_e, "input 'English' file", :type => :string, :required => true
+ opt :out_f, "output 'French' file", :type => :string, :required => true
+ opt :out_e, "output 'English' file", :type => :string, :required => true
+ opt :out_id, "output line Nos", :type => :string, :required => true
end
files = {}
-files[:f_file] = File.new opts[:in_f], 'r:UTF-8'
-files[:e_file] = File.new opts[:in_e], 'r:UTF-8'
-files[:f_out_file] = File.new opts[:out_f], 'w:UTF-8'
-files[:e_out_file] = File.new opts[:out_e], 'w:UTF-8'
-files[:id_out_file] = File.new opts[:out_id], 'w'
+files[:f_file] = ReadFile.new cfg[:in_f]
+files[:e_file] = ReadFile.new cfg[:in_e]
+files[:f_out_file] = WriteFile.new cfg[:out_f]
+files[:e_out_file] = WriteFile.new cfg[:out_e]
+files[:id_out_file] = WriteFile.new cfg[:out_id]
i = 0
while f_line = files[:f_file].gets
e_line = files[:e_file].gets
@@ -35,13 +28,14 @@ while f_line = files[:f_file].gets
e_line.strip!
a = f_line.split
b = e_line.split
- if a.size >= opts[:min] and a.size <= opts[:max] and \
- b.size >= opts[:min] and b.size <= opts[:max]
+ if a.size >= cfg[:min] and a.size <= cfg[:max] and \
+ b.size >= cfg[:min] and b.size <= cfg[:max]
files[:f_out_file].write "#{f_line}\n"
files[:e_out_file].write "#{e_line}\n"
files[:id_out_file].write "#{i}\n"
- end
+ end
i+=1
end
-files.values.each{|f|f.close}
+
+files.values.each{ |f| f.close }
--
cgit v1.2.3