summaryrefslogtreecommitdiff
path: root/linreg.rb
blob: 09630a6bfee9b0844deafa11cff4da56060fab65 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env ruby

require 'nlp_ruby'
require 'trollop'


def read_data fn
  f = ReadFile.new fn
  data = []
  while line = f.gets
    line.strip!
    v = SparseVector.new
    a = []
    a << 1.0
    tokenize(line).each { |i| a << i.to_f }
    v.from_a(a)
    data << v
  end
  return data  
end

def main
  cfg = Trollop::options do
    opt :input,         "input data",         :type => :string, :required => true
    opt :output,        "output data",        :type => :string, :required => true
    opt :learning_rate, "learning rate",      :type => :float,  :default => 0.07
    opt :stop,          "stopping criterion", :type => :int,    :default => 100
  end
  data = read_data cfg[:input]
  zeros = [0.0]*data[0].size
  t = ReadFile.new(cfg[:output]).readlines.map{ |i| i.to_f }
  model = SparseVector.new zeros
  stop = 0
  prev_model = nil
  i = 0
  while true
    i += 1
    u = SparseVector.new zeros
    data.each_with_index { |d,j|
      u += d * ((model.dot(d) - t[j])*(1.0/t.size))
    }
    u *= cfg[:learning_rate]
    model -= u
    if model.approx_eql? prev_model 
      stop += 1
    else
      stop = 0
    end
    break if stop==cfg[:stop]
    prev_model = model
  end
  tss = t.map{ |y| (y-t.mean)**2 }.sum
  j = -1
  rss = t.map{ |y| j+=1; (y-model.dot(data[j]))**2 }.sum
  STDERR.write "ran for #{i} iterations\n R^2=#{1-(rss/tss)}\n"
  puts model.to_s
end


main