1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
|
#!/usr/bin/env ruby
require 'optimist'
STDIN.set_encoding 'utf-8'
STDOUT.set_encoding 'utf-8'
opts = Optimist::options do
banner "sample --size <n> [--shuffle] --file <line separated data>"
opt :size, "Sample P % or # lines from file or N.", :type => :float
opt :shuffle, "Sample is shuffled.", :type => :bool
opt :file, "Input file.", :type => :string, :default => '-'
opt :output_index, "Output index number.", :type => :bool
opt :N, "Sample --size from N items.", :type => :int, :default => -1
opt :absolute, "Sample absolute number of items.", :type => :bool
end
input = []
index = []
i = 0
if opts[:N] == -1
if opts[:file] == '-'
file = STDIN
else
file = File.new opts[:file], 'r'
end
while line = file.gets
input << line
index << i
i += 1
end
else
index = (0..opts[:N]-1).to_a
end
sample = []
if !opts[:absolute]
sample = index.sample(index.size*(opts[:size]/100.0))
sample = index.sample(index.size*(opts[:size]/100.0))
else
sample = index.sample(opts[:size])
end
if !opts[:shuffle]
sample.sort!
end
while idx = sample.shift
if opts[:N] != -1
puts idx
else
if opts[:output_index]
puts "#{idx}\t#{input[idx]}"
else
puts "#{input[idx]}"
end
end
end
|