diff options
Diffstat (limited to 'training')
| -rwxr-xr-x | training/dtrain/parallelize.rb | 8 | 
1 files changed, 5 insertions, 3 deletions
diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index a1826e98..fca9b10d 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -5,16 +5,16 @@ require 'trollop'  def usage    if ARGV.size != 8      STDERR.write "Usage: " -    STDERR.write "ruby parallelize.rb -c <dtrain.ini> -e <epochs> [--randomize/-z] -s <#shards|0> -p <at once> -i <input> -r <refs> [--qsub/-q] --dtrain_binary <path to dtrain binary>\n" +    STDERR.write "ruby parallelize.rb -c <dtrain.ini> -e <epochs> [--randomize/-z] [--reshard/-y] -s <#shards|0> -p <at once> -i <input> -r <refs> [--qsub/-q] --dtrain_binary <path to dtrain binary> -l \"l2 select_k 100000\"\n"      exit 1    end  end -usage if not [11, 12, 13, 14].include? ARGV.size  opts = Trollop::options do    opt :config, "dtrain config file", :type => :string    opt :epochs, "number of epochs", :type => :int    opt :randomize, "randomize shards before each epoch", :type => :bool, :short => '-z', :default => false +  opt :reshard, "reshard after each epoch", :type => :bool, :short => '-y', :default => false    opt :shards, "number of shards", :type => :int    opt :processes_at_once, "have this number (max) running at the same time", :type => :int, :default => 9999    opt :input, "input", :type => :string @@ -40,6 +40,8 @@ ini        = opts[:config]  epochs     = opts[:epochs]  rand = false  rand = true if opts[:randomize] +reshard = false +reshard = true if opts[:reshard]  predefined_shards = false  if opts[:shards] == 0    predefined_shards = true @@ -142,7 +144,7 @@ end    end    `#{cat} work/weights.*.#{epoch} > work/weights_cat`    `#{ruby} #{lplp_rb} #{lplp_args} #{num_shards} < work/weights_cat > work/weights.#{epoch}` -  if rand and epoch+1!=epochs +  if rand and reshard and epoch+1!=epochs      input_files, refs_files = make_shards input, refs, num_shards, epoch+1, rand    end  }  | 
