From 2e983112813c41b40800aee1ce9d0a083763f224 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Wed, 25 Mar 2015 09:32:22 +0100 Subject: rewrote perceptron --- perceptron-new-test.rb | 30 ++++++++++++++++++++++++++++++ perceptron-new.rb | 24 +++++++++++++++++++----- 2 files changed, 49 insertions(+), 5 deletions(-) create mode 100755 perceptron-new-test.rb diff --git a/perceptron-new-test.rb b/perceptron-new-test.rb new file mode 100755 index 0000000..6566f68 --- /dev/null +++ b/perceptron-new-test.rb @@ -0,0 +1,30 @@ +#!/usr/bin/env ruby + +require 'zipf' + +STDERR.write "reading test data...\n" +test = [] +test_f = ReadFile.new ARGV[0] +n = 0 +while i = test_f.gets + test << SparseVector.from_kv(i.strip, '=', ' ') + n += 1 + STDERR.write "#{n}\n" if n%1000==0 +end +STDERR.write " test set size = #{test.size}\n" + +errors = 0 +w = SparseVector.from_kv ReadFile.new(ARGV[1]).read, "\t", "\n" + +test.each { |x| + m = w.dot(x) + if m <= 0.0 + errors += 1 + puts -1 + else + puts 1 + end +} + +STDERR.write "accuracy = #{(test.size-errors)/test.size.to_f}\n" + diff --git a/perceptron-new.rb b/perceptron-new.rb index 521e6f5..1c8a76c 100755 --- a/perceptron-new.rb +++ b/perceptron-new.rb @@ -18,6 +18,10 @@ T = 1000000 # max number of iterations t = 0 w = SparseVector.new # 0 vector no_change = 0 +save_freq = 1 +if ARGV[1] + save_freq = ARGV[1].to_i +end while true @@ -30,12 +34,14 @@ while true train.shuffle! loss = 0.0 + errors = 0 j = 1 train.each { |x| m = w.dot(x) if m <= 0.0 loss += m.abs + errors += 1 w += x end STDERR.write '.' if j%10==0 @@ -43,8 +49,7 @@ while true j += 1 } - STDERR.write "loss = #{loss}\n" - t += 1 + STDERR.write "errors = #{errors} (avg = #{(errors/train.size.to_f).round 2}), loss = #{loss.round 2} (avg = #{(loss/train.size).round 2})\n" if (loss.abs-prev_loss.abs).abs <= 10**-4 no_change += 1 @@ -57,11 +62,20 @@ while true end prev_loss = loss + if t%save_freq == 0 + STDERR.write "\nwriting model to model.#{t}.gz ...\n" + f = WriteFile.new "model.#{t}.gz" + f.write w.to_kv("\t", "\n")+"\n" + f.close + STDERR.write "done!\n" + end + + t += 1 end -STDERR.write "\nwriting model...\n" -f = WriteFile.new 'model.gz' -f.write w.to_kv('=', ' ')+"\n" +STDERR.write "\nwriting model to model.final.gz ...\n" +f = WriteFile.new "model.final.gz" +f.write w.to_kv("\t", "\n")+"\n" f.close STDERR.write "done!\n" -- cgit v1.2.3