summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2016-04-08 23:19:28 +0200
committerPatrick Simianer <p@simianer.de>2016-04-08 23:19:28 +0200
commit34d621606e58adeb35aa33f8503e105189a7c0d5 (patch)
tree18931029e5ceed1e2c2d77bfdf2b87a216633366
parent81f6b20d1c41d8906900c57ab71ec08a007ea02c (diff)
latest stuffHEADmaster
-rwxr-xr-xfast_test.rb26
-rwxr-xr-xrerank.rb2
-rwxr-xr-xrerank_output_lists.rb48
-rwxr-xr-xtest_n_learn.rb32
-rwxr-xr-xvoted_rerank.rb67
5 files changed, 174 insertions, 1 deletions
diff --git a/fast_test.rb b/fast_test.rb
new file mode 100755
index 0000000..23f7edf
--- /dev/null
+++ b/fast_test.rb
@@ -0,0 +1,26 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+STDERR.write "testing...\n"
+test = []
+test_f = ReadFile.new ARGV[0]
+n = 0
+errors = 0
+w = SparseVector.from_kv ReadFile.new(ARGV[1]).read, "\t", "\n"
+while i = test_f.gets
+ x = SparseVector.from_kv(i.strip, '=', ' ')
+ m = w.dot(x)
+ if m <= 0.0
+ errors += 1
+ puts -1
+ else
+ puts 1
+ end
+ n += 1
+ STDERR.write "#{n}\n" if n%1000==0
+end
+STDERR.write " test set size = #{n}\n"
+
+STDERR.write "accuracy = #{(n-errors)/n.to_f}\n"
+
diff --git a/rerank.rb b/rerank.rb
index 095e20b..9f7233c 100755
--- a/rerank.rb
+++ b/rerank.rb
@@ -18,7 +18,7 @@ class KbestItem
end
end
-w = SparseVector.from_kv ReadFile.new(ARGV[0]).read, "\t", "\n"
+w = SparseVector.from_kv ReadFile.new(ARGV[0]).read, /\s/, "\n"
def o kl
scores = []
diff --git a/rerank_output_lists.rb b/rerank_output_lists.rb
new file mode 100755
index 0000000..22bd9ca
--- /dev/null
+++ b/rerank_output_lists.rb
@@ -0,0 +1,48 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+class KbestItem
+ attr_accessor :rank, :model, :rr, :gold, :f
+ def initialize s
+ a = s.split "\t"
+ @rank = a[0].to_i
+ @gold = a[1].to_f
+ @model = a[2].to_f
+ @rr = -1.0
+ @f = SparseVector.from_kv a[3], "=", " "
+ end
+
+ def to_s
+ return "#{@model}\t#{@gold}\t#{@rank}\t#{@rr}"
+ end
+end
+
+w = SparseVector.from_kv ReadFile.new(ARGV[0]).read, "\t", "\n"
+
+def o kl, j
+ f = WriteFile.new "lists/#{j}.model"
+ g = WriteFile.new "lists/#{j}.rr"
+ kl.each { |i|
+ f.write "#{i.gold}\t#{i.model}\n"
+ g.write "#{i.gold}\t#{i.rr}\n"
+ }
+end
+
+`mkdir -p lists`
+STDERR.write "reranking..\n"
+cur = []
+k_sum = 0
+j = 0
+while line = STDIN.gets
+ item = KbestItem.new line.strip
+ item.rr = w.dot(item.f)
+ if item.rank == 0 && cur.size > 0
+ o cur, j
+ cur = []
+ j += 1
+ end
+ cur << item
+end
+o cur, j
+
diff --git a/test_n_learn.rb b/test_n_learn.rb
new file mode 100755
index 0000000..03c0cd4
--- /dev/null
+++ b/test_n_learn.rb
@@ -0,0 +1,32 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+STDERR.write "reading test data...\n"
+test = []
+test_f = ReadFile.new ARGV[0]
+threshold = ARGV[2].to_f
+signals = ReadFile.new(ARGV[1]).readlines_strip.map{|i| (i.to_f)>=threshold}
+n = 0
+while i = test_f.gets
+ test << SparseVector.from_kv(i.strip, '=', ' ')
+ n += 1
+ STDERR.write "#{n}\n" if n%1000==0
+end
+STDERR.write " test set size = #{test.size}\n"
+
+errors = 0
+w = SparseVector.from_kv ReadFile.new(ARGV[1]).read, "\t", "\n"
+
+test.each { |x|
+ m = w.dot(x)
+ if m <= 0.0
+ errors += 1
+ puts -1
+ else
+ puts 1
+ end
+}
+
+STDERR.write "accuracy = #{(test.size-errors)/test.size.to_f}\n"
+
diff --git a/voted_rerank.rb b/voted_rerank.rb
new file mode 100755
index 0000000..684825d
--- /dev/null
+++ b/voted_rerank.rb
@@ -0,0 +1,67 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+class KbestItem
+ attr_accessor :rank, :model, :rr, :gold, :f
+ def initialize s
+ a = s.split "\t"
+ @rank = a[0].to_i
+ @gold = a[1].to_f
+ @model = a[2].to_f
+ @rr = -1.0
+ @f = SparseVector.from_kv a[3], "=", " "
+ end
+
+ def to_s
+ return "#{@model}\t#{@gold}"
+ end
+end
+
+ws = []
+cs = []
+ReadFile.readlines_strip(ARGV[0]).each { |l|
+ c, s = l.split "\t"
+ cs << c.to_i
+ next if !s||s.strip==""
+ ws << SparseVector.from_kv(s, "=", " ")
+}
+
+def sign(x)
+ if x <= 0
+ return -1.0
+ else
+ return 1.0
+ end
+end
+
+def o kl
+ scores = []
+ scores << kl.first.gold
+ kl.sort! { |i,j| j.model <=> i.model }
+ scores << kl.first.gold
+ kl.sort! { |i,j| j.rr <=> i.rr }
+ scores << kl.first.gold
+
+ puts scores.join "\t"
+end
+
+STDERR.write "reranking..\n"
+cur = []
+k_sum = 0
+j = 0
+while line = STDIN.gets
+ item = KbestItem.new line.strip
+ item.rr = 0
+ ws.each_with_index{ |w,j|
+ item.rr += sign(w.dot(x))*cs[j]
+ }
+ if item.rank == 0 && cur.size > 0
+ o cur
+ cur = []
+ j += 1
+ end
+ cur << item
+end
+o cur
+