From 34d621606e58adeb35aa33f8503e105189a7c0d5 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 8 Apr 2016 23:19:28 +0200 Subject: latest stuff --- fast_test.rb | 26 ++++++++++++++++++++ rerank.rb | 2 +- rerank_output_lists.rb | 48 ++++++++++++++++++++++++++++++++++++ test_n_learn.rb | 32 ++++++++++++++++++++++++ voted_rerank.rb | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 174 insertions(+), 1 deletion(-) create mode 100755 fast_test.rb create mode 100755 rerank_output_lists.rb create mode 100755 test_n_learn.rb create mode 100755 voted_rerank.rb diff --git a/fast_test.rb b/fast_test.rb new file mode 100755 index 0000000..23f7edf --- /dev/null +++ b/fast_test.rb @@ -0,0 +1,26 @@ +#!/usr/bin/env ruby + +require 'zipf' + +STDERR.write "testing...\n" +test = [] +test_f = ReadFile.new ARGV[0] +n = 0 +errors = 0 +w = SparseVector.from_kv ReadFile.new(ARGV[1]).read, "\t", "\n" +while i = test_f.gets + x = SparseVector.from_kv(i.strip, '=', ' ') + m = w.dot(x) + if m <= 0.0 + errors += 1 + puts -1 + else + puts 1 + end + n += 1 + STDERR.write "#{n}\n" if n%1000==0 +end +STDERR.write " test set size = #{n}\n" + +STDERR.write "accuracy = #{(n-errors)/n.to_f}\n" + diff --git a/rerank.rb b/rerank.rb index 095e20b..9f7233c 100755 --- a/rerank.rb +++ b/rerank.rb @@ -18,7 +18,7 @@ class KbestItem end end -w = SparseVector.from_kv ReadFile.new(ARGV[0]).read, "\t", "\n" +w = SparseVector.from_kv ReadFile.new(ARGV[0]).read, /\s/, "\n" def o kl scores = [] diff --git a/rerank_output_lists.rb b/rerank_output_lists.rb new file mode 100755 index 0000000..22bd9ca --- /dev/null +++ b/rerank_output_lists.rb @@ -0,0 +1,48 @@ +#!/usr/bin/env ruby + +require 'zipf' + +class KbestItem + attr_accessor :rank, :model, :rr, :gold, :f + def initialize s + a = s.split "\t" + @rank = a[0].to_i + @gold = a[1].to_f + @model = a[2].to_f + @rr = -1.0 + @f = SparseVector.from_kv a[3], "=", " " + end + + def to_s + return "#{@model}\t#{@gold}\t#{@rank}\t#{@rr}" + end +end + +w = SparseVector.from_kv ReadFile.new(ARGV[0]).read, "\t", "\n" + +def o kl, j + f = WriteFile.new "lists/#{j}.model" + g = WriteFile.new "lists/#{j}.rr" + kl.each { |i| + f.write "#{i.gold}\t#{i.model}\n" + g.write "#{i.gold}\t#{i.rr}\n" + } +end + +`mkdir -p lists` +STDERR.write "reranking..\n" +cur = [] +k_sum = 0 +j = 0 +while line = STDIN.gets + item = KbestItem.new line.strip + item.rr = w.dot(item.f) + if item.rank == 0 && cur.size > 0 + o cur, j + cur = [] + j += 1 + end + cur << item +end +o cur, j + diff --git a/test_n_learn.rb b/test_n_learn.rb new file mode 100755 index 0000000..03c0cd4 --- /dev/null +++ b/test_n_learn.rb @@ -0,0 +1,32 @@ +#!/usr/bin/env ruby + +require 'zipf' + +STDERR.write "reading test data...\n" +test = [] +test_f = ReadFile.new ARGV[0] +threshold = ARGV[2].to_f +signals = ReadFile.new(ARGV[1]).readlines_strip.map{|i| (i.to_f)>=threshold} +n = 0 +while i = test_f.gets + test << SparseVector.from_kv(i.strip, '=', ' ') + n += 1 + STDERR.write "#{n}\n" if n%1000==0 +end +STDERR.write " test set size = #{test.size}\n" + +errors = 0 +w = SparseVector.from_kv ReadFile.new(ARGV[1]).read, "\t", "\n" + +test.each { |x| + m = w.dot(x) + if m <= 0.0 + errors += 1 + puts -1 + else + puts 1 + end +} + +STDERR.write "accuracy = #{(test.size-errors)/test.size.to_f}\n" + diff --git a/voted_rerank.rb b/voted_rerank.rb new file mode 100755 index 0000000..684825d --- /dev/null +++ b/voted_rerank.rb @@ -0,0 +1,67 @@ +#!/usr/bin/env ruby + +require 'zipf' + +class KbestItem + attr_accessor :rank, :model, :rr, :gold, :f + def initialize s + a = s.split "\t" + @rank = a[0].to_i + @gold = a[1].to_f + @model = a[2].to_f + @rr = -1.0 + @f = SparseVector.from_kv a[3], "=", " " + end + + def to_s + return "#{@model}\t#{@gold}" + end +end + +ws = [] +cs = [] +ReadFile.readlines_strip(ARGV[0]).each { |l| + c, s = l.split "\t" + cs << c.to_i + next if !s||s.strip=="" + ws << SparseVector.from_kv(s, "=", " ") +} + +def sign(x) + if x <= 0 + return -1.0 + else + return 1.0 + end +end + +def o kl + scores = [] + scores << kl.first.gold + kl.sort! { |i,j| j.model <=> i.model } + scores << kl.first.gold + kl.sort! { |i,j| j.rr <=> i.rr } + scores << kl.first.gold + + puts scores.join "\t" +end + +STDERR.write "reranking..\n" +cur = [] +k_sum = 0 +j = 0 +while line = STDIN.gets + item = KbestItem.new line.strip + item.rr = 0 + ws.each_with_index{ |w,j| + item.rr += sign(w.dot(x))*cs[j] + } + if item.rank == 0 && cur.size > 0 + o cur + cur = [] + j += 1 + end + cur << item +end +o cur + -- cgit v1.2.3