summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xlinreg.rb61
-rw-r--r--test/linreg/x.dat50
-rw-r--r--test/linreg/y.dat50
3 files changed, 161 insertions, 0 deletions
diff --git a/linreg.rb b/linreg.rb
new file mode 100755
index 0000000..09630a6
--- /dev/null
+++ b/linreg.rb
@@ -0,0 +1,61 @@
+#!/usr/bin/env ruby
+
+require 'nlp_ruby'
+require 'trollop'
+
+
+def read_data fn
+ f = ReadFile.new fn
+ data = []
+ while line = f.gets
+ line.strip!
+ v = SparseVector.new
+ a = []
+ a << 1.0
+ tokenize(line).each { |i| a << i.to_f }
+ v.from_a(a)
+ data << v
+ end
+ return data
+end
+
+def main
+ cfg = Trollop::options do
+ opt :input, "input data", :type => :string, :required => true
+ opt :output, "output data", :type => :string, :required => true
+ opt :learning_rate, "learning rate", :type => :float, :default => 0.07
+ opt :stop, "stopping criterion", :type => :int, :default => 100
+ end
+ data = read_data cfg[:input]
+ zeros = [0.0]*data[0].size
+ t = ReadFile.new(cfg[:output]).readlines.map{ |i| i.to_f }
+ model = SparseVector.new zeros
+ stop = 0
+ prev_model = nil
+ i = 0
+ while true
+ i += 1
+ u = SparseVector.new zeros
+ data.each_with_index { |d,j|
+ u += d * ((model.dot(d) - t[j])*(1.0/t.size))
+ }
+ u *= cfg[:learning_rate]
+ model -= u
+ if model.approx_eql? prev_model
+ stop += 1
+ else
+ stop = 0
+ end
+ break if stop==cfg[:stop]
+ prev_model = model
+ end
+ tss = t.map{ |y| (y-t.mean)**2 }.sum
+ j = -1
+ rss = t.map{ |y| j+=1; (y-model.dot(data[j]))**2 }.sum
+ STDERR.write "ran for #{i} iterations\n R^2=#{1-(rss/tss)}\n"
+ puts model.to_s
+end
+
+
+main
+
diff --git a/test/linreg/x.dat b/test/linreg/x.dat
new file mode 100644
index 0000000..3d93394
--- /dev/null
+++ b/test/linreg/x.dat
@@ -0,0 +1,50 @@
+ 2.0658746e+00
+ 2.3684087e+00
+ 2.5399929e+00
+ 2.5420804e+00
+ 2.5490790e+00
+ 2.7866882e+00
+ 2.9116825e+00
+ 3.0356270e+00
+ 3.1146696e+00
+ 3.1582389e+00
+ 3.3275944e+00
+ 3.3793165e+00
+ 3.4122006e+00
+ 3.4215823e+00
+ 3.5315732e+00
+ 3.6393002e+00
+ 3.6732537e+00
+ 3.9256462e+00
+ 4.0498646e+00
+ 4.2483348e+00
+ 4.3440052e+00
+ 4.3826531e+00
+ 4.4230602e+00
+ 4.6102443e+00
+ 4.6881183e+00
+ 4.9777333e+00
+ 5.0359967e+00
+ 5.0684536e+00
+ 5.4161491e+00
+ 5.4395623e+00
+ 5.4563207e+00
+ 5.5698458e+00
+ 5.6015729e+00
+ 5.6877617e+00
+ 5.7215602e+00
+ 5.8538914e+00
+ 6.1978026e+00
+ 6.3510941e+00
+ 6.4797033e+00
+ 6.7383791e+00
+ 6.8637686e+00
+ 7.0223387e+00
+ 7.0782373e+00
+ 7.1514232e+00
+ 7.4664023e+00
+ 7.5973874e+00
+ 7.7440717e+00
+ 7.7729662e+00
+ 7.8264514e+00
+ 7.9306356e+00
diff --git a/test/linreg/y.dat b/test/linreg/y.dat
new file mode 100644
index 0000000..1f4f963
--- /dev/null
+++ b/test/linreg/y.dat
@@ -0,0 +1,50 @@
+ 7.7918926e-01
+ 9.1596757e-01
+ 9.0538354e-01
+ 9.0566138e-01
+ 9.3898890e-01
+ 9.6684740e-01
+ 9.6436824e-01
+ 9.1445939e-01
+ 9.3933944e-01
+ 9.6074971e-01
+ 8.9837094e-01
+ 9.1209739e-01
+ 9.4238499e-01
+ 9.6624578e-01
+ 1.0526500e+00
+ 1.0143791e+00
+ 9.5969426e-01
+ 9.6853716e-01
+ 1.0766065e+00
+ 1.1454978e+00
+ 1.0340625e+00
+ 1.0070009e+00
+ 9.6683648e-01
+ 1.0895919e+00
+ 1.0634462e+00
+ 1.1237239e+00
+ 1.0323374e+00
+ 1.0874452e+00
+ 1.0702988e+00
+ 1.1606493e+00
+ 1.0778037e+00
+ 1.1069758e+00
+ 1.0971875e+00
+ 1.1648603e+00
+ 1.1411796e+00
+ 1.0844156e+00
+ 1.1252493e+00
+ 1.1168341e+00
+ 1.1970789e+00
+ 1.2069462e+00
+ 1.1251046e+00
+ 1.1235672e+00
+ 1.2132829e+00
+ 1.2522652e+00
+ 1.2497065e+00
+ 1.1799706e+00
+ 1.1897299e+00
+ 1.3029934e+00
+ 1.2601134e+00
+ 1.2562267e+00