diff options
author | Patrick Simianer <p@simianer.de> | 2014-03-02 16:36:13 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-03-02 16:36:13 +0100 |
commit | 9d693723ba7bcf380182e8bd4d622f6d8eff4e3a (patch) | |
tree | e7c489836a41bdbcc30c9690108ed9e3a77b965d | |
parent | 38862b7e0cde7ac7285169c10e1377357ea24488 (diff) |
logistic regression
-rwxr-xr-x | lin_reg | 8 | ||||
-rwxr-xr-x | log_reg | 73 | ||||
-rw-r--r-- | test/lin_reg/exptected.txt | 3 | ||||
-rw-r--r-- | test/lin_reg/input.dat (renamed from test/lin_reg/x.dat) | 0 | ||||
-rw-r--r-- | test/lin_reg/output.dat (renamed from test/lin_reg/y.dat) | 0 | ||||
-rw-r--r-- | test/log_reg/expected.txt | 2 | ||||
-rw-r--r-- | test/log_reg/input.dat | 80 | ||||
-rw-r--r-- | test/log_reg/output.dat | 80 |
8 files changed, 242 insertions, 4 deletions
@@ -44,13 +44,13 @@ def main i += 1 u = SparseVector.new zeros overall_loss = 0.0 - data.each_with_index { |d,j| - loss = model.dot(d) - t[j] + data.each_with_index { |x,j| + loss = model.dot(x) - t[j] overall_loss += loss**2 - u += d * (loss * (1.0/t.size)) + u += x * loss } STDERR.write "#{i} #{overall_loss/data.size}\n" if cfg[:show_loss] - u *= cfg[:learning_rate] + u *= cfg[:learning_rate]*(1.0/t.size) model -= u if model.approx_eql? prev_model stop += 1 @@ -0,0 +1,73 @@ +#!/usr/bin/env ruby + +require 'nlp_ruby' +require 'matrix' +require 'trollop' + + +def read_data fn + f = ReadFile.new fn + data = [] + while line = f.gets + line.strip! + a = [] + a << 1.0 + tokenize(line).each { |i| a << i.to_f } + v = Vector.elements a + data << v + end + return data +end + +def dot x, y + r = 0.0 + x.each_with_index { |_,j| + r += x[j] * y[j] + } + return r +end + +def approx_eql x, y, eps=10**-10 + return false if !x||!y + return false if x.size!=y.size + x.each_with_index { |_,i| + return false if (x[i]-y[i]).abs>eps + } + return true +end + +def main + cfg = Trollop::options do + opt :input, "input data", :type => :string, :required => true + opt :output, "1/0 output data", :type => :string, :required => true + end + data = read_data cfg[:input] + dim = data[0].size + zeros = [0.0]*dim + t = ReadFile.readlines(cfg[:output]).map{ |i| i.to_f } + model = Vector.elements zeros + prev_model = nil + gradient = Vector.elements zeros + hessian = Matrix.build(dim,dim) { |i,j| 0.0 } + i = 0 + while true + i += 1 + data.each_with_index { |x,j| + m = 1.0/(1+Math.exp(-dot(model, x))) + gradient += (m-t[j]) * x + hup = Matrix.column_vector(x) * Matrix.row_vector(x) + hessian += m*(1.0-m) * hup + } + gradient /= data.size + hessian /= data.size + model -= hessian.inverse * gradient + break if approx_eql model, prev_model + prev_model = model + end + STDERR.write "ran for #{i} iterations\n" + puts model.to_s +end + + +main + diff --git a/test/lin_reg/exptected.txt b/test/lin_reg/exptected.txt new file mode 100644 index 0000000..13de1fc --- /dev/null +++ b/test/lin_reg/exptected.txt @@ -0,0 +1,3 @@ +ran for 2527 iterations + R^2=0.858063223720823 +{0=>0.7501625304145768, 1=>0.06388116702419537} diff --git a/test/lin_reg/x.dat b/test/lin_reg/input.dat index 3d93394..3d93394 100644 --- a/test/lin_reg/x.dat +++ b/test/lin_reg/input.dat diff --git a/test/lin_reg/y.dat b/test/lin_reg/output.dat index 1f4f963..1f4f963 100644 --- a/test/lin_reg/y.dat +++ b/test/lin_reg/output.dat diff --git a/test/log_reg/expected.txt b/test/log_reg/expected.txt new file mode 100644 index 0000000..46a03ef --- /dev/null +++ b/test/log_reg/expected.txt @@ -0,0 +1,2 @@ +ran for 15 iterations +Vector[-16.378743410287445, 0.1483407737248737, 0.1589084517934473] diff --git a/test/log_reg/input.dat b/test/log_reg/input.dat new file mode 100644 index 0000000..eed0ab1 --- /dev/null +++ b/test/log_reg/input.dat @@ -0,0 +1,80 @@ + 5.5500000e+01 6.9500000e+01 + 4.1000000e+01 8.1500000e+01 + 5.3500000e+01 8.6000000e+01 + 4.6000000e+01 8.4000000e+01 + 4.1000000e+01 7.3500000e+01 + 5.1500000e+01 6.9000000e+01 + 5.1000000e+01 6.2500000e+01 + 4.2000000e+01 7.5000000e+01 + 5.3500000e+01 8.3000000e+01 + 5.7500000e+01 7.1000000e+01 + 4.2500000e+01 7.2500000e+01 + 4.1000000e+01 8.0000000e+01 + 4.6000000e+01 8.2000000e+01 + 4.6000000e+01 6.0500000e+01 + 4.9500000e+01 7.6000000e+01 + 4.1000000e+01 7.6000000e+01 + 4.8500000e+01 7.2500000e+01 + 5.1500000e+01 8.2500000e+01 + 4.4500000e+01 7.0500000e+01 + 4.4000000e+01 6.6000000e+01 + 3.3000000e+01 7.6500000e+01 + 3.3500000e+01 7.8500000e+01 + 3.1500000e+01 7.2000000e+01 + 3.3000000e+01 8.1500000e+01 + 4.2000000e+01 5.9500000e+01 + 3.0000000e+01 6.4000000e+01 + 6.1000000e+01 4.5000000e+01 + 4.9000000e+01 7.9000000e+01 + 2.6500000e+01 6.4500000e+01 + 3.4000000e+01 7.1500000e+01 + 4.2000000e+01 8.3500000e+01 + 2.9500000e+01 7.4500000e+01 + 3.9500000e+01 7.0000000e+01 + 5.1500000e+01 6.6000000e+01 + 4.1500000e+01 7.1500000e+01 + 4.2500000e+01 7.9500000e+01 + 3.5000000e+01 5.9500000e+01 + 3.8500000e+01 7.3500000e+01 + 3.2000000e+01 8.1500000e+01 + 4.6000000e+01 6.0500000e+01 + 3.6500000e+01 5.3000000e+01 + 3.6500000e+01 5.3500000e+01 + 2.4000000e+01 6.0500000e+01 + 1.9000000e+01 5.7500000e+01 + 3.4500000e+01 6.0000000e+01 + 3.7500000e+01 6.4500000e+01 + 3.5500000e+01 5.1000000e+01 + 3.7000000e+01 5.0500000e+01 + 2.1500000e+01 4.2000000e+01 + 3.5500000e+01 5.8500000e+01 + 2.6500000e+01 6.8500000e+01 + 2.6500000e+01 5.5500000e+01 + 1.8500000e+01 6.7000000e+01 + 4.0000000e+01 6.7000000e+01 + 3.2500000e+01 7.1500000e+01 + 3.9000000e+01 7.1500000e+01 + 4.3000000e+01 5.5500000e+01 + 2.2000000e+01 5.4000000e+01 + 3.6000000e+01 6.2500000e+01 + 3.1000000e+01 5.5500000e+01 + 3.8500000e+01 7.6000000e+01 + 4.0000000e+01 7.5000000e+01 + 3.7500000e+01 6.3000000e+01 + 2.4500000e+01 5.8000000e+01 + 3.0000000e+01 6.7000000e+01 + 3.3000000e+01 5.6000000e+01 + 5.6500000e+01 6.1000000e+01 + 4.1000000e+01 5.7000000e+01 + 4.9500000e+01 6.3000000e+01 + 3.4500000e+01 7.2500000e+01 + 3.2500000e+01 6.9000000e+01 + 3.6000000e+01 7.3000000e+01 + 2.7000000e+01 5.3500000e+01 + 4.1000000e+01 6.3500000e+01 + 2.9500000e+01 5.2500000e+01 + 2.0000000e+01 6.5500000e+01 + 3.8000000e+01 6.5000000e+01 + 1.8500000e+01 7.4500000e+01 + 1.6000000e+01 7.2500000e+01 + 3.3500000e+01 6.8000000e+01 diff --git a/test/log_reg/output.dat b/test/log_reg/output.dat new file mode 100644 index 0000000..51283c0 --- /dev/null +++ b/test/log_reg/output.dat @@ -0,0 +1,80 @@ + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 1.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 + 0.0000000e+00 |