summaryrefslogtreecommitdiff
path: root/log-reg
blob: 5e435554baa8a0a3045aca7355a0c771ff9c17f4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env ruby

require 'zipf'
require 'matrix'
require 'optimist'

def read_data fn
  f = ReadFile.new fn
  data = []
  while line = f.gets
    line.strip!
    a = []
    a << 1.0
    tokenize(line).each { |i| a << i.to_f }
    v = Vector.elements a
    data << v
  end
  return data
end

def dot x, y
  r = 0.0
  x.each_with_index { |_,j|
    r += x[j] * y[j]
  }
  return r
end

def approx_eql x, y, eps=10**-10
  return false if !x||!y
  return false if x.size!=y.size
  x.each_with_index { |_,i|
    return false if (x[i]-y[i]).abs>eps 
  }
  return true
end

def main
  conf = Optimist::options do
    opt :input,         "input data",         :type => :string, :required => true
    opt :output,        "1/0 output data",    :type => :string, :required => true
  end
  data = read_data conf[:input]
  dim = data[0].size
  zeros = [0.0]*dim
  t = ReadFile.readlines(conf[:output]).map{ |i| i.to_f }
  model = Vector.elements zeros
  prev_model = nil
  gradient = Vector.elements zeros
  hessian = Matrix.build(dim,dim) { |i,j| 0.0 }
  i = 0 
  while true
    i += 1
    data.each_with_index { |x,j|
      m = 1.0/(1+Math.exp(-dot(model, x)))
      gradient += (m-t[j]) * x
      hup = Matrix.column_vector(x) * Matrix.row_vector(x)
      hessian += m*(1.0-m) * hup
    }
    gradient /= data.size
    hessian /= data.size
    model -= hessian.inverse * gradient
    break if approx_eql model, prev_model
    prev_model = model
  end
  STDERR.write "ran for #{i} iterations\n"
  puts model.to_s
end

main