diff options
author | Patrick Simianer <p@simianer.de> | 2016-07-05 11:01:46 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2016-07-05 11:01:46 +0200 |
commit | 2b1d7f881c19c4d4b5afae194e02d3300c7675d0 (patch) | |
tree | 5a06ee7de98640a39244b57bb369697176b44ebf /lin-reg | |
parent | 69949dda35c3ea21d8e926e5f0a596a0a0f61c6a (diff) |
mv
Diffstat (limited to 'lin-reg')
-rwxr-xr-x | lin-reg | 70 |
1 files changed, 70 insertions, 0 deletions
@@ -0,0 +1,70 @@ +#!/usr/bin/env ruby + +require 'zipf' +require 'trollop' + +def read_data fn, scale + f = ReadFile.new fn + data = [] + while line = f.gets + line.strip! + a = [] + a << 1.0 + tokenize(line).each { |i| a << i.to_f } + v = SparseVector.from_a a + data << v + end + if scale + data.map { |i| i.keys }.flatten.uniq.each { |k| + max = data.map { |i| i[k] }.max + data.each { |i| i[k] /= max } + } + end + return data +end + +def main + conf = Trollop::options do + opt :input, "input data", :type => :string, :required => true + opt :output, "output data", :type => :string, :required => true + opt :learning_rate, "learning rate", :type => :float, :default => 0.07 + opt :stop, "stopping criterion", :type => :int, :default => 100 + opt :scale_features,"scale features", :type => :bool, :default => false, :short => '-t' + opt :show_loss, "show loss per iter", :type => :bool, :default => false + end + data = read_data conf[:input], conf[:scale_features] + zeros = [0.0]*data[0].size + t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f } + model = SparseVector.new zeros + stop = 0 + prev_model = nil + i = 0 + while true + i += 1 + u = SparseVector.new zeros + overall_loss = 0.0 + data.each_with_index { |x,j| + loss = model.dot(x) - t[j] + overall_loss += loss**2 + u += x * loss + } + STDERR.write "#{i} #{overall_loss/data.size}\n" if conf[:show_loss] + u *= conf[:learning_rate]*(1.0/t.size) + model -= u + if model.approx_eql? prev_model + stop += 1 + else + stop = 0 + end + break if stop==conf[:stop] + prev_model = model + end + tss = t.map{ |y| (y-t.mean)**2 }.sum + j = -1 + rss = t.map{ |y| j+=1; (y-model.dot(data[j]))**2 }.sum + STDERR.write "ran for #{i} iterations\n R^2=#{1-(rss/tss)}\n" + puts model.to_s +end + +main + |