1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
#!/usr/bin/ruby
require 'zipf'
require 'trollop'
cfg = Trollop::options do
opt :min, "minimum #tokens", :type => :int, :default => 1
opt :max, "maximum #tokens", :type => :int, :default => 80, :short => '-n'
opt :in_f, "input 'French' file", :type => :string, :required => true
opt :in_e, "input 'English' file", :type => :string, :required => true
opt :out_f, "output 'French' file", :type => :string, :required => true
opt :out_e, "output 'English' file", :type => :string, :required => true
opt :out_id, "output line Nos", :type => :string, :required => true
end
files = {}
files[:f_file] = ReadFile.new cfg[:in_f]
files[:e_file] = ReadFile.new cfg[:in_e]
files[:f_out_file] = WriteFile.new cfg[:out_f]
files[:e_out_file] = WriteFile.new cfg[:out_e]
files[:id_out_file] = WriteFile.new cfg[:out_id]
i = 0
while f_line = files[:f_file].gets
e_line = files[:e_file].gets
f_line.strip!
e_line.strip!
a = f_line.split
b = e_line.split
if a.size >= cfg[:min] and a.size <= cfg[:max] and \
b.size >= cfg[:min] and b.size <= cfg[:max]
files[:f_out_file].write "#{f_line}\n"
files[:e_out_file].write "#{e_line}\n"
files[:id_out_file].write "#{i}\n"
end
i+=1
end
files.values.each{ |f| f.close }
|