#!/usr/bin/env ruby require 'optimist' STDIN.set_encoding 'utf-8' STDOUT.set_encoding 'utf-8' opts = Optimist::options do banner "sample --size [--shuffle] --file " opt :size, "Sample P % or # lines from file or N.", :type => :float opt :shuffle, "Sample is shuffled.", :type => :bool opt :file, "Input file.", :type => :string, :default => '-' opt :output_index, "Output index number.", :type => :bool opt :N, "Sample --size from N items.", :type => :int, :default => -1 opt :absolute, "Sample absolute number of items.", :type => :bool end input = [] index = [] i = 0 if opts[:N] == -1 if opts[:file] == '-' file = STDIN else file = File.new opts[:file], 'r' end while line = file.gets input << line index << i i += 1 end else index = (0..opts[:N]-1).to_a end sample = [] if !opts[:absolute] sample = index.sample(index.size*(opts[:size]/100.0)) sample = index.sample(index.size*(opts[:size]/100.0)) else sample = index.sample(opts[:size]) end if !opts[:shuffle] sample.sort! end while idx = sample.shift if opts[:N] != -1 puts idx else if opts[:output_index] puts "#{idx}\t#{input[idx]}" else puts "#{input[idx]}" end end end