From 6ae893b1a83e1f38d2c72ff025fd2a1300919dbc Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Wed, 18 Mar 2015 12:09:28 +0100 Subject: init --- perceptron-new.rb | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100755 perceptron-new.rb (limited to 'perceptron-new.rb') diff --git a/perceptron-new.rb b/perceptron-new.rb new file mode 100755 index 0000000..521e6f5 --- /dev/null +++ b/perceptron-new.rb @@ -0,0 +1,67 @@ +#!/usr/bin/env ruby + +require 'zipf' + +STDERR.write "reading training data...\n" +train = [] +train_f = ReadFile.new ARGV[0] +n = 0 +while i = train_f.gets + train << SparseVector.from_kv(i.strip, '=', ' ') + n += 1 + STDERR.write "#{n}\n" if n%1000==0 +end +STDERR.write " training set size = #{train.size}\n" + +prev_loss = Float::MAX # converged? +T = 1000000 # max number of iterations +t = 0 +w = SparseVector.new # 0 vector +no_change = 0 + +while true + + if t == T + STDERR.write "\nreached max. number of iterations!\n" + break + end + + STDERR.write "\niteration #{t}\n" + + train.shuffle! + loss = 0.0 + j = 1 + + train.each { |x| + m = w.dot(x) + if m <= 0.0 + loss += m.abs + w += x + end + STDERR.write '.' if j%10==0 + STDERR.write "\n" if j%1000==0 + j += 1 + } + + STDERR.write "loss = #{loss}\n" + t += 1 + + if (loss.abs-prev_loss.abs).abs <= 10**-4 + no_change += 1 + else + no_change = 0 + end + if no_change == 3 + STDERR.write "\nno change in loss since three iterations (difference < 10**-4)!\n" + break + end + prev_loss = loss + +end + +STDERR.write "\nwriting model...\n" +f = WriteFile.new 'model.gz' +f.write w.to_kv('=', ' ')+"\n" +f.close +STDERR.write "done!\n" + -- cgit v1.2.3