summaryrefslogtreecommitdiff
path: root/perceptron-new.rb
blob: 521e6f5839f8aa6b46ab96285e432306c9a0a43a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env ruby

require 'zipf'

STDERR.write "reading training data...\n"
train = []
train_f = ReadFile.new ARGV[0]
n = 0
while i = train_f.gets
  train << SparseVector.from_kv(i.strip, '=', ' ')
  n += 1
  STDERR.write "#{n}\n" if n%1000==0
end
STDERR.write " training set size = #{train.size}\n"

prev_loss = Float::MAX # converged?
T = 1000000            # max number of iterations
t = 0
w = SparseVector.new   # 0 vector
no_change = 0

while true

  if t == T
    STDERR.write "\nreached max. number of iterations!\n"
    break
  end

  STDERR.write "\niteration #{t}\n"

  train.shuffle!
  loss = 0.0
  j = 1

  train.each { |x|
    m = w.dot(x)
    if m <= 0.0
      loss += m.abs
      w += x
    end
    STDERR.write '.'  if j%10==0
    STDERR.write "\n" if j%1000==0
    j += 1
  }

  STDERR.write "loss = #{loss}\n"
  t += 1

  if (loss.abs-prev_loss.abs).abs <= 10**-4
    no_change += 1
  else
    no_change = 0
  end
  if no_change == 3
    STDERR.write "\nno change in loss since three iterations (difference < 10**-4)!\n"
    break
  end
  prev_loss = loss

end

STDERR.write "\nwriting model...\n"
f = WriteFile.new 'model.gz'
f.write w.to_kv('=', ' ')+"\n"
f.close
STDERR.write "done!\n"