summaryrefslogtreecommitdiff
path: root/training/dtrain/parallelize.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-03-15 11:31:18 +0100
committerPatrick Simianer <p@simianer.de>2013-03-15 11:31:18 +0100
commit2a48d73eb794fdd736d1df035c8a31af887cde0a (patch)
treebd9121f660b7dba21a194ae685e93189b9545488 /training/dtrain/parallelize.rb
parent529c8f0671ce0b09c2a797278a8f84242c86465d (diff)
overhauled ruby scripts and examples
Diffstat (limited to 'training/dtrain/parallelize.rb')
-rwxr-xr-xtraining/dtrain/parallelize.rb26
1 files changed, 11 insertions, 15 deletions
diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 24e7f49e..e661416e 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -3,16 +3,15 @@
require 'trollop'
def usage
- if ARGV.size != 8
- STDERR.write "Usage: "
- STDERR.write "ruby parallelize.rb -c <dtrain.ini> -e <epochs> [--randomize/-z] [--reshard/-y] -s <#shards|0> -p <at once> -i <input> -r <refs> [--qsub/-q] --dtrain_binary <path to dtrain binary> -l \"l2 select_k 100000\"\n"
- exit 1
- end
+ STDERR.write "Usage: "
+ STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> -r <refs> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"]\n"
+ exit 1
end
opts = Trollop::options do
opt :config, "dtrain config file", :type => :string
- opt :epochs, "number of epochs", :type => :int
+ opt :epochs, "number of epochs", :type => :int, :default => 10
+ opt :lplp_args, "arguments for lplp.rb", :type => :string, :default => "l2 select_k 100000"
opt :randomize, "randomize shards before each epoch", :type => :bool, :short => '-z', :default => false
opt :reshard, "reshard after each epoch", :type => :bool, :short => '-y', :default => false
opt :shards, "number of shards", :type => :int
@@ -21,8 +20,8 @@ opts = Trollop::options do
opt :references, "references", :type => :string
opt :qsub, "use qsub", :type => :bool, :default => false
opt :dtrain_binary, "path to dtrain binary", :type => :string
- opt :lplp_args, "arguments for lplp arguments", :type => :string, :default => "l2 select_k 100000"
end
+usage if not opts[:config]&&opts[:shards]&&opts[:input]&&opts[:references]
dtrain_dir = File.expand_path File.dirname(__FILE__)
@@ -32,16 +31,14 @@ else
dtrain_bin = opts[:dtrain_binary]
end
ruby = '/usr/bin/ruby'
-lplp_rb = "#{dtrain_dir}/hstreaming/lplp.rb"
+lplp_rb = "#{dtrain_dir}/lplp.rb"
lplp_args = opts[:lplp_args]
cat = '/bin/cat'
ini = opts[:config]
epochs = opts[:epochs]
-rand = false
-rand = true if opts[:randomize]
-reshard = false
-reshard = true if opts[:reshard]
+rand = opts[:randomize]
+reshard = opts[:reshard]
predefined_shards = false
if opts[:shards] == 0
predefined_shards = true
@@ -49,11 +46,10 @@ if opts[:shards] == 0
else
num_shards = opts[:shards]
end
-shards_at_once = opts[:processes_at_once]
input = opts[:input]
refs = opts[:references]
-use_qsub = false
-use_qsub = true if opts[:qsub]
+use_qsub = opts[:qsub]
+shards_at_once = opts[:processes_at_once]
`mkdir work`