summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2015-03-25 09:32:22 +0100
committerPatrick Simianer <p@simianer.de>2015-03-25 09:32:22 +0100
commit2e983112813c41b40800aee1ce9d0a083763f224 (patch)
treef40e535e739a9d60a94336835d6c810f22ea12b4
parent6ae893b1a83e1f38d2c72ff025fd2a1300919dbc (diff)
rewrote perceptron
-rwxr-xr-xperceptron-new-test.rb30
-rwxr-xr-xperceptron-new.rb24
2 files changed, 49 insertions, 5 deletions
diff --git a/perceptron-new-test.rb b/perceptron-new-test.rb
new file mode 100755
index 0000000..6566f68
--- /dev/null
+++ b/perceptron-new-test.rb
@@ -0,0 +1,30 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+STDERR.write "reading test data...\n"
+test = []
+test_f = ReadFile.new ARGV[0]
+n = 0
+while i = test_f.gets
+ test << SparseVector.from_kv(i.strip, '=', ' ')
+ n += 1
+ STDERR.write "#{n}\n" if n%1000==0
+end
+STDERR.write " test set size = #{test.size}\n"
+
+errors = 0
+w = SparseVector.from_kv ReadFile.new(ARGV[1]).read, "\t", "\n"
+
+test.each { |x|
+ m = w.dot(x)
+ if m <= 0.0
+ errors += 1
+ puts -1
+ else
+ puts 1
+ end
+}
+
+STDERR.write "accuracy = #{(test.size-errors)/test.size.to_f}\n"
+
diff --git a/perceptron-new.rb b/perceptron-new.rb
index 521e6f5..1c8a76c 100755
--- a/perceptron-new.rb
+++ b/perceptron-new.rb
@@ -18,6 +18,10 @@ T = 1000000 # max number of iterations
t = 0
w = SparseVector.new # 0 vector
no_change = 0
+save_freq = 1
+if ARGV[1]
+ save_freq = ARGV[1].to_i
+end
while true
@@ -30,12 +34,14 @@ while true
train.shuffle!
loss = 0.0
+ errors = 0
j = 1
train.each { |x|
m = w.dot(x)
if m <= 0.0
loss += m.abs
+ errors += 1
w += x
end
STDERR.write '.' if j%10==0
@@ -43,8 +49,7 @@ while true
j += 1
}
- STDERR.write "loss = #{loss}\n"
- t += 1
+ STDERR.write "errors = #{errors} (avg = #{(errors/train.size.to_f).round 2}), loss = #{loss.round 2} (avg = #{(loss/train.size).round 2})\n"
if (loss.abs-prev_loss.abs).abs <= 10**-4
no_change += 1
@@ -57,11 +62,20 @@ while true
end
prev_loss = loss
+ if t%save_freq == 0
+ STDERR.write "\nwriting model to model.#{t}.gz ...\n"
+ f = WriteFile.new "model.#{t}.gz"
+ f.write w.to_kv("\t", "\n")+"\n"
+ f.close
+ STDERR.write "done!\n"
+ end
+
+ t += 1
end
-STDERR.write "\nwriting model...\n"
-f = WriteFile.new 'model.gz'
-f.write w.to_kv('=', ' ')+"\n"
+STDERR.write "\nwriting model to model.final.gz ...\n"
+f = WriteFile.new "model.final.gz"
+f.write w.to_kv("\t", "\n")+"\n"
f.close
STDERR.write "done!\n"