blob: 521e6f5839f8aa6b46ab96285e432306c9a0a43a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
#!/usr/bin/env ruby
require 'zipf'
STDERR.write "reading training data...\n"
train = []
train_f = ReadFile.new ARGV[0]
n = 0
while i = train_f.gets
train << SparseVector.from_kv(i.strip, '=', ' ')
n += 1
STDERR.write "#{n}\n" if n%1000==0
end
STDERR.write " training set size = #{train.size}\n"
prev_loss = Float::MAX # converged?
T = 1000000 # max number of iterations
t = 0
w = SparseVector.new # 0 vector
no_change = 0
while true
if t == T
STDERR.write "\nreached max. number of iterations!\n"
break
end
STDERR.write "\niteration #{t}\n"
train.shuffle!
loss = 0.0
j = 1
train.each { |x|
m = w.dot(x)
if m <= 0.0
loss += m.abs
w += x
end
STDERR.write '.' if j%10==0
STDERR.write "\n" if j%1000==0
j += 1
}
STDERR.write "loss = #{loss}\n"
t += 1
if (loss.abs-prev_loss.abs).abs <= 10**-4
no_change += 1
else
no_change = 0
end
if no_change == 3
STDERR.write "\nno change in loss since three iterations (difference < 10**-4)!\n"
break
end
prev_loss = loss
end
STDERR.write "\nwriting model...\n"
f = WriteFile.new 'model.gz'
f.write w.to_kv('=', ' ')+"\n"
f.close
STDERR.write "done!\n"
|