diff options
Diffstat (limited to 'lin_reg')
-rwxr-xr-x | lin_reg | 70 |
1 files changed, 0 insertions, 70 deletions
diff --git a/lin_reg b/lin_reg deleted file mode 100755 index 7a8e614..0000000 --- a/lin_reg +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' -require 'trollop' - -def read_data fn, scale - f = ReadFile.new fn - data = [] - while line = f.gets - line.strip! - a = [] - a << 1.0 - tokenize(line).each { |i| a << i.to_f } - v = SparseVector.from_a a - data << v - end - if scale - data.map { |i| i.keys }.flatten.uniq.each { |k| - max = data.map { |i| i[k] }.max - data.each { |i| i[k] /= max } - } - end - return data -end - -def main - conf = Trollop::options do - opt :input, "input data", :type => :string, :required => true - opt :output, "output data", :type => :string, :required => true - opt :learning_rate, "learning rate", :type => :float, :default => 0.07 - opt :stop, "stopping criterion", :type => :int, :default => 100 - opt :scale_features,"scale features", :type => :bool, :default => false, :short => '-t' - opt :show_loss, "show loss per iter", :type => :bool, :default => false - end - data = read_data conf[:input], conf[:scale_features] - zeros = [0.0]*data[0].size - t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f } - model = SparseVector.new zeros - stop = 0 - prev_model = nil - i = 0 - while true - i += 1 - u = SparseVector.new zeros - overall_loss = 0.0 - data.each_with_index { |x,j| - loss = model.dot(x) - t[j] - overall_loss += loss**2 - u += x * loss - } - STDERR.write "#{i} #{overall_loss/data.size}\n" if conf[:show_loss] - u *= conf[:learning_rate]*(1.0/t.size) - model -= u - if model.approx_eql? prev_model - stop += 1 - else - stop = 0 - end - break if stop==conf[:stop] - prev_model = model - end - tss = t.map{ |y| (y-t.mean)**2 }.sum - j = -1 - rss = t.map{ |y| j+=1; (y-model.dot(data[j]))**2 }.sum - STDERR.write "ran for #{i} iterations\n R^2=#{1-(rss/tss)}\n" - puts model.to_s -end - -main - |