diff options
author | Patrick Simianer <p@simianer.de> | 2014-09-21 14:02:24 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-09-21 14:02:24 +0100 |
commit | 45761f29e0136a08869bef4d0db5296eeedd6e63 (patch) | |
tree | ea97c646d42983f4311d6b7f523c23e9acbf66b4 /sample | |
parent | a3fee2abd19ed0aedebc0f3949fb969f2ce7836e (diff) |
sample
Diffstat (limited to 'sample')
-rwxr-xr-x | sample | 53 |
1 files changed, 47 insertions, 6 deletions
@@ -5,14 +5,55 @@ require 'trollop' STDIN.set_encoding 'utf-8' STDOUT.set_encoding 'utf-8' - opts = Trollop::options do - banner "sample --size <n> < <line separated data>" - opt :size, "Sample n% (percentage).", :type => :int + banner "sample --size <n> [--shuffle] --file <line separated data>" + opt :size, "Sample P % or # lines from file or N.", :type => :float + opt :shuffle, "Sample is shuffled.", :type => :bool + opt :file, "Input file.", :type => :string, :default => '-' + opt :output_index, "Output index number.", :type => :bool + opt :N, "Sample --size from N items.", :type => :int, :default => -1 + opt :absolute, "Sample absolute number of items.", :type => :bool +end + +input = [] +index = [] +i = 0 +if opts[:N] == -1 + if opts[:file] == '-' + file = STDIN + else + file = File.new opts[:file], 'r' + end + while line = file.gets + input << line + index << i + i += 1 + end +else + index = (0..opts[:N]-1).to_a +end + +sample = [] +if !opts[:absolute] + sample = index.sample(index.size*(opts[:size]/100.0)) + sample = index.sample(index.size*(opts[:size]/100.0)) +else + sample = index.sample(opts[:size]) +end + +if !opts[:shuffle] + sample.sort! end -prng = Random.new(Random.new_seed) -while line = STDIN.gets - STDOUT.write line if prng.rand(1..opts[:size])==0 +while idx = sample.shift + if opts[:N] != -1 + puts idx + else + if opts[:output_index] + puts "#{idx} #{input[idx]}" + else + puts "#{input[idx]}" + end + end end |