summaryrefslogtreecommitdiff
path: root/lin_reg
diff options
context:
space:
mode:
Diffstat (limited to 'lin_reg')
-rwxr-xr-xlin_reg70
1 files changed, 0 insertions, 70 deletions
diff --git a/lin_reg b/lin_reg
deleted file mode 100755
index 7a8e614..0000000
--- a/lin_reg
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'zipf'
-require 'trollop'
-
-def read_data fn, scale
- f = ReadFile.new fn
- data = []
- while line = f.gets
- line.strip!
- a = []
- a << 1.0
- tokenize(line).each { |i| a << i.to_f }
- v = SparseVector.from_a a
- data << v
- end
- if scale
- data.map { |i| i.keys }.flatten.uniq.each { |k|
- max = data.map { |i| i[k] }.max
- data.each { |i| i[k] /= max }
- }
- end
- return data
-end
-
-def main
- conf = Trollop::options do
- opt :input, "input data", :type => :string, :required => true
- opt :output, "output data", :type => :string, :required => true
- opt :learning_rate, "learning rate", :type => :float, :default => 0.07
- opt :stop, "stopping criterion", :type => :int, :default => 100
- opt :scale_features,"scale features", :type => :bool, :default => false, :short => '-t'
- opt :show_loss, "show loss per iter", :type => :bool, :default => false
- end
- data = read_data conf[:input], conf[:scale_features]
- zeros = [0.0]*data[0].size
- t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f }
- model = SparseVector.new zeros
- stop = 0
- prev_model = nil
- i = 0
- while true
- i += 1
- u = SparseVector.new zeros
- overall_loss = 0.0
- data.each_with_index { |x,j|
- loss = model.dot(x) - t[j]
- overall_loss += loss**2
- u += x * loss
- }
- STDERR.write "#{i} #{overall_loss/data.size}\n" if conf[:show_loss]
- u *= conf[:learning_rate]*(1.0/t.size)
- model -= u
- if model.approx_eql? prev_model
- stop += 1
- else
- stop = 0
- end
- break if stop==conf[:stop]
- prev_model = model
- end
- tss = t.map{ |y| (y-t.mean)**2 }.sum
- j = -1
- rss = t.map{ |y| j+=1; (y-model.dot(data[j]))**2 }.sum
- STDERR.write "ran for #{i} iterations\n R^2=#{1-(rss/tss)}\n"
- puts model.to_s
-end
-
-main
-