#!/usr/bin/env ruby require 'trollop' def make_shards(input, refs, alignments, output_prefix, num_shards=2, rand=false) lc = `wc -l #{input}`.split.first.to_i input_ext = input.split('.').last refs_ext = refs.split('.').last index = (0..lc-1).to_a index.reverse! index.shuffle! if rand shard_sz = lc / num_shards leftover = lc % num_shards in_f = ReadFile.new input in_lines = in_f.readlines refs_f = ReadFile.new refs refs_lines = refs_f.readlines a_f = ReadFile.new alignments a_lines = a_f.readlines shard_in_files = [] shard_refs_files = [] shard_a_files = [] in_fns = [] refs_fns = [] a_fns = [] 0.upto(num_shards-1) { |shard| in_fn = "#{output_prefix}.#{shard}.#{input_ext}" shard_in = WriteFile.new in_fn in_fns << in_fn refs_fn = "#{output_prefix}.#{shard}.#{refs_ext}" shard_refs = WriteFile.new refs_fn refs_fns << refs_fn a_fn = "#{output_prefix}.#{shard}.a" shard_a = WriteFile.new a_fn a_fns << a_fn 0.upto(shard_sz-1) { |i| j = index.pop shard_in.write in_lines[j] shard_refs.write refs_lines[j] shard_a.write a_lines[j] } shard_in_files << shard_in shard_refs_files << shard_refs shard_a_files << shard_a } if !rand while leftover > 0 j = index.pop shard_in_files[-1].write in_lines[j] shard_refs_files[-1].write refs_lines[j] shard_a_files[-1].write a_lines[j] leftover -= 1 end else 0.upto(num_shards-1) { |shard| break if leftover <= 0 j = index.pop shard_in_files[shard].write in_lines[j] shard_refs_files[shard].write refs_lines[j] shard_a_files[shard].write a_lines[j] leftover -= 1 } end (shard_in_files + shard_refs_files).each do |f| f.close end in_f.close refs_f.close return [in_fns, refs_fns] end opts = Trollop::options do opt :input, 'input', :type => :string, :required => true opt :references, 'references', :type => :string, :required => true opt :alignments, 'alignments', :type => :string, :required => true opt :output_prefix, 'output prefix', :type => :string, :required => true opt :randomize, 'randomize', :type => :bool, :default => false, :short => '-z' opt :num_shards, 'number of shards', :type => :int, :required => true end make_shards(opts[:input], opts[:references], opts[:alignments], opts[:output_prefix], opts[:num_shards], opts[:randomize])