summaryrefslogtreecommitdiff
path: root/voted_perceptron.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2015-08-04 15:57:31 +0200
committerPatrick Simianer <p@simianer.de>2015-08-04 15:57:31 +0200
commit81f6b20d1c41d8906900c57ab71ec08a007ea02c (patch)
treeb05250a71f8ec2417f0fb26fa2b6e79ea41deca6 /voted_perceptron.rb
parent5f983dd1edeff0dd04c701381c7f0d3f2a83a525 (diff)
voted perceptron
Diffstat (limited to 'voted_perceptron.rb')
-rwxr-xr-xvoted_perceptron.rb93
1 files changed, 93 insertions, 0 deletions
diff --git a/voted_perceptron.rb b/voted_perceptron.rb
new file mode 100755
index 0000000..13211b1
--- /dev/null
+++ b/voted_perceptron.rb
@@ -0,0 +1,93 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+STDERR.write "reading training data...\n"
+train = []
+train_f = ReadFile.new ARGV[0]
+n = 0
+while i = train_f.gets
+ train << SparseVector.from_kv(i.strip, '=', ' ')
+ n += 1
+ STDERR.write "#{n}\n" if n%1000==0
+end
+STDERR.write " training set size = #{train.size}\n"
+
+prev_loss = Float::MAX # converged?
+T = 1000000 # max number of iterations
+t = 0
+w = SparseVector.new # 0 vector
+c = 0
+ws = []
+cs = []
+no_change = 0
+save_freq = 1
+if ARGV[1]
+ save_freq = ARGV[1].to_i
+end
+
+while true
+
+ if t == T
+ STDERR.write "\nreached max. number of iterations!\n"
+ break
+ end
+
+ STDERR.write "\niteration #{t}\n"
+
+ train.shuffle!
+ loss = 0.0
+ errors = 0
+ j = 1
+
+ train.each { |x|
+ m = w.dot(x)
+ if m <= 0.0
+ loss += m.abs
+ errors += 1
+ ws << SparseVector.new(w)
+ cs << c
+ w += x
+ c = 0
+ else
+ c += 1
+ end
+ STDERR.write '.' if j%10==0
+ STDERR.write "\n" if j%1000==0
+ j += 1
+ }
+
+ STDERR.write "errors = #{errors} (avg = #{(errors/train.size.to_f).round 2}), loss = #{loss.round 2} (avg = #{(loss/train.size).round 2}); #w:#{ws.size}, max c:#{cs.max} \n"
+
+ if (loss.abs-prev_loss.abs).abs <= 10**-4
+ no_change += 1
+ else
+ no_change = 0
+ end
+ if no_change == 3
+ STDERR.write "\nno change in loss since three iterations (difference < 10**-4)!\n"
+ break
+ end
+ prev_loss = loss
+
+ if t%save_freq == 0
+ STDERR.write "\nwriting model to model.#{t}.gz ...\n"
+ f = WriteFile.new "model.#{t}.gz"
+ ws.each_with_index { |v,j|
+ f.write "#{cs[j]}\t#{v.to_kv("=", " ")+"\n"}"
+ }
+ f.close
+ STDERR.write "done!\n"
+ end
+
+ t += 1
+end
+
+STDERR.write "\nwriting model to model.final.gz ...\n"
+f = WriteFile.new "model.final.gz"
+ws.each_with_index { |v,j|
+ f.write "#{cs[j]}\t#{v.to_kv("=", " ")+"\n"}"
+}
+f.close
+STDERR.write "done!\n"
+