summaryrefslogtreecommitdiff
path: root/sample
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-09-21 14:02:24 +0100
committerPatrick Simianer <p@simianer.de>2014-09-21 14:02:24 +0100
commit45761f29e0136a08869bef4d0db5296eeedd6e63 (patch)
treeea97c646d42983f4311d6b7f523c23e9acbf66b4 /sample
parenta3fee2abd19ed0aedebc0f3949fb969f2ce7836e (diff)
sample
Diffstat (limited to 'sample')
-rwxr-xr-xsample53
1 files changed, 47 insertions, 6 deletions
diff --git a/sample b/sample
index e693d5c..8dbd26d 100755
--- a/sample
+++ b/sample
@@ -5,14 +5,55 @@ require 'trollop'
STDIN.set_encoding 'utf-8'
STDOUT.set_encoding 'utf-8'
-
opts = Trollop::options do
- banner "sample --size <n> < <line separated data>"
- opt :size, "Sample n% (percentage).", :type => :int
+ banner "sample --size <n> [--shuffle] --file <line separated data>"
+ opt :size, "Sample P % or # lines from file or N.", :type => :float
+ opt :shuffle, "Sample is shuffled.", :type => :bool
+ opt :file, "Input file.", :type => :string, :default => '-'
+ opt :output_index, "Output index number.", :type => :bool
+ opt :N, "Sample --size from N items.", :type => :int, :default => -1
+ opt :absolute, "Sample absolute number of items.", :type => :bool
+end
+
+input = []
+index = []
+i = 0
+if opts[:N] == -1
+ if opts[:file] == '-'
+ file = STDIN
+ else
+ file = File.new opts[:file], 'r'
+ end
+ while line = file.gets
+ input << line
+ index << i
+ i += 1
+ end
+else
+ index = (0..opts[:N]-1).to_a
+end
+
+sample = []
+if !opts[:absolute]
+ sample = index.sample(index.size*(opts[:size]/100.0))
+ sample = index.sample(index.size*(opts[:size]/100.0))
+else
+ sample = index.sample(opts[:size])
+end
+
+if !opts[:shuffle]
+ sample.sort!
end
-prng = Random.new(Random.new_seed)
-while line = STDIN.gets
- STDOUT.write line if prng.rand(1..opts[:size])==0
+while idx = sample.shift
+ if opts[:N] != -1
+ puts idx
+ else
+ if opts[:output_index]
+ puts "#{idx} #{input[idx]}"
+ else
+ puts "#{input[idx]}"
+ end
+ end
end