summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-03-02 16:36:13 +0100
committerPatrick Simianer <p@simianer.de>2014-03-02 16:36:13 +0100
commit9d693723ba7bcf380182e8bd4d622f6d8eff4e3a (patch)
treee7c489836a41bdbcc30c9690108ed9e3a77b965d
parent38862b7e0cde7ac7285169c10e1377357ea24488 (diff)
logistic regression
-rwxr-xr-xlin_reg8
-rwxr-xr-xlog_reg73
-rw-r--r--test/lin_reg/exptected.txt3
-rw-r--r--test/lin_reg/input.dat (renamed from test/lin_reg/x.dat)0
-rw-r--r--test/lin_reg/output.dat (renamed from test/lin_reg/y.dat)0
-rw-r--r--test/log_reg/expected.txt2
-rw-r--r--test/log_reg/input.dat80
-rw-r--r--test/log_reg/output.dat80
8 files changed, 242 insertions, 4 deletions
diff --git a/lin_reg b/lin_reg
index 3546c3e..d512104 100755
--- a/lin_reg
+++ b/lin_reg
@@ -44,13 +44,13 @@ def main
i += 1
u = SparseVector.new zeros
overall_loss = 0.0
- data.each_with_index { |d,j|
- loss = model.dot(d) - t[j]
+ data.each_with_index { |x,j|
+ loss = model.dot(x) - t[j]
overall_loss += loss**2
- u += d * (loss * (1.0/t.size))
+ u += x * loss
}
STDERR.write "#{i} #{overall_loss/data.size}\n" if cfg[:show_loss]
- u *= cfg[:learning_rate]
+ u *= cfg[:learning_rate]*(1.0/t.size)
model -= u
if model.approx_eql? prev_model
stop += 1
diff --git a/log_reg b/log_reg
new file mode 100755
index 0000000..c0a95d4
--- /dev/null
+++ b/log_reg
@@ -0,0 +1,73 @@
+#!/usr/bin/env ruby
+
+require 'nlp_ruby'
+require 'matrix'
+require 'trollop'
+
+
+def read_data fn
+ f = ReadFile.new fn
+ data = []
+ while line = f.gets
+ line.strip!
+ a = []
+ a << 1.0
+ tokenize(line).each { |i| a << i.to_f }
+ v = Vector.elements a
+ data << v
+ end
+ return data
+end
+
+def dot x, y
+ r = 0.0
+ x.each_with_index { |_,j|
+ r += x[j] * y[j]
+ }
+ return r
+end
+
+def approx_eql x, y, eps=10**-10
+ return false if !x||!y
+ return false if x.size!=y.size
+ x.each_with_index { |_,i|
+ return false if (x[i]-y[i]).abs>eps
+ }
+ return true
+end
+
+def main
+ cfg = Trollop::options do
+ opt :input, "input data", :type => :string, :required => true
+ opt :output, "1/0 output data", :type => :string, :required => true
+ end
+ data = read_data cfg[:input]
+ dim = data[0].size
+ zeros = [0.0]*dim
+ t = ReadFile.readlines(cfg[:output]).map{ |i| i.to_f }
+ model = Vector.elements zeros
+ prev_model = nil
+ gradient = Vector.elements zeros
+ hessian = Matrix.build(dim,dim) { |i,j| 0.0 }
+ i = 0
+ while true
+ i += 1
+ data.each_with_index { |x,j|
+ m = 1.0/(1+Math.exp(-dot(model, x)))
+ gradient += (m-t[j]) * x
+ hup = Matrix.column_vector(x) * Matrix.row_vector(x)
+ hessian += m*(1.0-m) * hup
+ }
+ gradient /= data.size
+ hessian /= data.size
+ model -= hessian.inverse * gradient
+ break if approx_eql model, prev_model
+ prev_model = model
+ end
+ STDERR.write "ran for #{i} iterations\n"
+ puts model.to_s
+end
+
+
+main
+
diff --git a/test/lin_reg/exptected.txt b/test/lin_reg/exptected.txt
new file mode 100644
index 0000000..13de1fc
--- /dev/null
+++ b/test/lin_reg/exptected.txt
@@ -0,0 +1,3 @@
+ran for 2527 iterations
+ R^2=0.858063223720823
+{0=>0.7501625304145768, 1=>0.06388116702419537}
diff --git a/test/lin_reg/x.dat b/test/lin_reg/input.dat
index 3d93394..3d93394 100644
--- a/test/lin_reg/x.dat
+++ b/test/lin_reg/input.dat
diff --git a/test/lin_reg/y.dat b/test/lin_reg/output.dat
index 1f4f963..1f4f963 100644
--- a/test/lin_reg/y.dat
+++ b/test/lin_reg/output.dat
diff --git a/test/log_reg/expected.txt b/test/log_reg/expected.txt
new file mode 100644
index 0000000..46a03ef
--- /dev/null
+++ b/test/log_reg/expected.txt
@@ -0,0 +1,2 @@
+ran for 15 iterations
+Vector[-16.378743410287445, 0.1483407737248737, 0.1589084517934473]
diff --git a/test/log_reg/input.dat b/test/log_reg/input.dat
new file mode 100644
index 0000000..eed0ab1
--- /dev/null
+++ b/test/log_reg/input.dat
@@ -0,0 +1,80 @@
+ 5.5500000e+01 6.9500000e+01
+ 4.1000000e+01 8.1500000e+01
+ 5.3500000e+01 8.6000000e+01
+ 4.6000000e+01 8.4000000e+01
+ 4.1000000e+01 7.3500000e+01
+ 5.1500000e+01 6.9000000e+01
+ 5.1000000e+01 6.2500000e+01
+ 4.2000000e+01 7.5000000e+01
+ 5.3500000e+01 8.3000000e+01
+ 5.7500000e+01 7.1000000e+01
+ 4.2500000e+01 7.2500000e+01
+ 4.1000000e+01 8.0000000e+01
+ 4.6000000e+01 8.2000000e+01
+ 4.6000000e+01 6.0500000e+01
+ 4.9500000e+01 7.6000000e+01
+ 4.1000000e+01 7.6000000e+01
+ 4.8500000e+01 7.2500000e+01
+ 5.1500000e+01 8.2500000e+01
+ 4.4500000e+01 7.0500000e+01
+ 4.4000000e+01 6.6000000e+01
+ 3.3000000e+01 7.6500000e+01
+ 3.3500000e+01 7.8500000e+01
+ 3.1500000e+01 7.2000000e+01
+ 3.3000000e+01 8.1500000e+01
+ 4.2000000e+01 5.9500000e+01
+ 3.0000000e+01 6.4000000e+01
+ 6.1000000e+01 4.5000000e+01
+ 4.9000000e+01 7.9000000e+01
+ 2.6500000e+01 6.4500000e+01
+ 3.4000000e+01 7.1500000e+01
+ 4.2000000e+01 8.3500000e+01
+ 2.9500000e+01 7.4500000e+01
+ 3.9500000e+01 7.0000000e+01
+ 5.1500000e+01 6.6000000e+01
+ 4.1500000e+01 7.1500000e+01
+ 4.2500000e+01 7.9500000e+01
+ 3.5000000e+01 5.9500000e+01
+ 3.8500000e+01 7.3500000e+01
+ 3.2000000e+01 8.1500000e+01
+ 4.6000000e+01 6.0500000e+01
+ 3.6500000e+01 5.3000000e+01
+ 3.6500000e+01 5.3500000e+01
+ 2.4000000e+01 6.0500000e+01
+ 1.9000000e+01 5.7500000e+01
+ 3.4500000e+01 6.0000000e+01
+ 3.7500000e+01 6.4500000e+01
+ 3.5500000e+01 5.1000000e+01
+ 3.7000000e+01 5.0500000e+01
+ 2.1500000e+01 4.2000000e+01
+ 3.5500000e+01 5.8500000e+01
+ 2.6500000e+01 6.8500000e+01
+ 2.6500000e+01 5.5500000e+01
+ 1.8500000e+01 6.7000000e+01
+ 4.0000000e+01 6.7000000e+01
+ 3.2500000e+01 7.1500000e+01
+ 3.9000000e+01 7.1500000e+01
+ 4.3000000e+01 5.5500000e+01
+ 2.2000000e+01 5.4000000e+01
+ 3.6000000e+01 6.2500000e+01
+ 3.1000000e+01 5.5500000e+01
+ 3.8500000e+01 7.6000000e+01
+ 4.0000000e+01 7.5000000e+01
+ 3.7500000e+01 6.3000000e+01
+ 2.4500000e+01 5.8000000e+01
+ 3.0000000e+01 6.7000000e+01
+ 3.3000000e+01 5.6000000e+01
+ 5.6500000e+01 6.1000000e+01
+ 4.1000000e+01 5.7000000e+01
+ 4.9500000e+01 6.3000000e+01
+ 3.4500000e+01 7.2500000e+01
+ 3.2500000e+01 6.9000000e+01
+ 3.6000000e+01 7.3000000e+01
+ 2.7000000e+01 5.3500000e+01
+ 4.1000000e+01 6.3500000e+01
+ 2.9500000e+01 5.2500000e+01
+ 2.0000000e+01 6.5500000e+01
+ 3.8000000e+01 6.5000000e+01
+ 1.8500000e+01 7.4500000e+01
+ 1.6000000e+01 7.2500000e+01
+ 3.3500000e+01 6.8000000e+01
diff --git a/test/log_reg/output.dat b/test/log_reg/output.dat
new file mode 100644
index 0000000..51283c0
--- /dev/null
+++ b/test/log_reg/output.dat
@@ -0,0 +1,80 @@
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 1.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00
+ 0.0000000e+00