diff options
| author | Patrick Simianer <p@simianer.de> | 2014-09-21 14:02:24 +0100 | 
|---|---|---|
| committer | Patrick Simianer <p@simianer.de> | 2014-09-21 14:02:24 +0100 | 
| commit | 45761f29e0136a08869bef4d0db5296eeedd6e63 (patch) | |
| tree | ea97c646d42983f4311d6b7f523c23e9acbf66b4 | |
| parent | a3fee2abd19ed0aedebc0f3949fb969f2ce7836e (diff) | |
sample
| -rwxr-xr-x | sample | 53 | 
1 files changed, 47 insertions, 6 deletions
| @@ -5,14 +5,55 @@ require 'trollop'  STDIN.set_encoding 'utf-8'  STDOUT.set_encoding 'utf-8' -  opts = Trollop::options do -  banner "sample --size <n> < <line separated data>" -  opt :size, "Sample n% (percentage).", :type => :int +  banner "sample --size <n> [--shuffle] --file <line separated data>" +  opt :size, "Sample P % or # lines from file or N.", :type => :float +  opt :shuffle, "Sample is shuffled.", :type => :bool +  opt :file, "Input file.", :type => :string, :default => '-' +  opt :output_index, "Output index number.", :type => :bool +  opt :N, "Sample --size from N items.", :type => :int, :default => -1 +  opt :absolute, "Sample absolute number of items.", :type => :bool +end + +input = [] +index = [] +i = 0 +if opts[:N] == -1 +  if opts[:file] == '-' +    file = STDIN +  else +    file = File.new opts[:file], 'r' +  end +  while line = file.gets +    input << line +    index << i +    i += 1 +  end +else +  index = (0..opts[:N]-1).to_a +end + +sample = [] +if !opts[:absolute] +  sample = index.sample(index.size*(opts[:size]/100.0)) +  sample = index.sample(index.size*(opts[:size]/100.0)) +else +  sample = index.sample(opts[:size]) +end + +if !opts[:shuffle] +  sample.sort!  end -prng = Random.new(Random.new_seed) -while line = STDIN.gets -  STDOUT.write line if prng.rand(1..opts[:size])==0 +while idx = sample.shift +  if opts[:N] != -1 +    puts idx +  else +    if opts[:output_index] +      puts "#{idx} #{input[idx]}" +    else +      puts "#{input[idx]}" +    end +  end  end | 
