summaryrefslogtreecommitdiff
path: root/algorithms/log_reg_sgd.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-14 16:46:27 +0200
committerPatrick Simianer <p@simianer.de>2014-06-14 16:46:27 +0200
commit26c490f404731d053a6205719b6246502c07b449 (patch)
tree3aa721098f1251dfbf2249ecd2736434c13b1d48 /algorithms/log_reg_sgd.rb
init
Diffstat (limited to 'algorithms/log_reg_sgd.rb')
-rwxr-xr-xalgorithms/log_reg_sgd.rb71
1 files changed, 71 insertions, 0 deletions
diff --git a/algorithms/log_reg_sgd.rb b/algorithms/log_reg_sgd.rb
new file mode 100755
index 0000000..16a48e2
--- /dev/null
+++ b/algorithms/log_reg_sgd.rb
@@ -0,0 +1,71 @@
+#!/usr/bin/env ruby
+
+require 'nlp_ruby'
+require 'trollop'
+
+
+def read_data fn, scale
+ f = ReadFile.new fn
+ data = []
+ while line = f.gets
+ line.strip!
+ a = []
+ a << 1.0
+ tokenize(line).each { |i| a << i.to_f }
+ v = SparseVector.from_a a
+ data << v
+ end
+ if scale
+ data.map { |i| i.keys }.flatten.uniq.each { |k|
+ max = data.map { |i| i[k] }.max
+ data.each { |i| i[k] /= max }
+ }
+ end
+ return data
+end
+
+def main
+ cfg = Trollop::options do
+ opt :input, "input data", :type => :string, :required => true
+ opt :output, "1/0 output data", :type => :string, :required => true
+ opt :learning_rate, "learning rate", :type => :float, :default => 0.07
+ opt :stop, "stopping criterion", :type => :int, :default => 100
+ opt :scale_features,"scale features", :type => :bool, :default => false, :short => '-t'
+ opt :show_loss, "show loss per iter", :type => :bool, :default => false
+ end
+ data = read_data cfg[:input], cfg[:scale_features]
+ zeros = [0.0]*data[0].size
+ t = ReadFile.readlines(cfg[:output]).map{ |i| i.to_f }
+ model = SparseVector.new zeros
+ stop = 0
+ prev_model = nil
+ i = 0
+ while true
+ i += 1
+ u = SparseVector.new zeros
+ overall_loss = 0.0
+ data.each_with_index { |x,j|
+ m = 1.0/(1+Math.exp(-model.dot(x)))
+ loss = m - t[j]
+ overall_loss += loss
+ u += x * loss
+ }
+ STDERR.write "#{i} #{overall_loss/data.size}\n" if cfg[:show_loss]
+ u *= cfg[:learning_rate]*(1.0/t.size)
+ model -= u
+ if model.approx_eql? prev_model
+ stop += 1
+ else
+ stop = 0
+ end
+ break if stop==cfg[:stop]
+ prev_model = model
+ puts model.to_s
+ end
+ STDERR.write "ran for #{i} iterations\n"
+ puts model.to_s
+end
+
+
+main
+