diff options
| author | Patrick Simianer <p@simianer.de> | 2016-04-08 23:19:28 +0200 | 
|---|---|---|
| committer | Patrick Simianer <p@simianer.de> | 2016-04-08 23:19:28 +0200 | 
| commit | 34d621606e58adeb35aa33f8503e105189a7c0d5 (patch) | |
| tree | 18931029e5ceed1e2c2d77bfdf2b87a216633366 | |
| parent | 81f6b20d1c41d8906900c57ab71ec08a007ea02c (diff) | |
| -rwxr-xr-x | fast_test.rb | 26 | ||||
| -rwxr-xr-x | rerank.rb | 2 | ||||
| -rwxr-xr-x | rerank_output_lists.rb | 48 | ||||
| -rwxr-xr-x | test_n_learn.rb | 32 | ||||
| -rwxr-xr-x | voted_rerank.rb | 67 | 
5 files changed, 174 insertions, 1 deletions
| diff --git a/fast_test.rb b/fast_test.rb new file mode 100755 index 0000000..23f7edf --- /dev/null +++ b/fast_test.rb @@ -0,0 +1,26 @@ +#!/usr/bin/env ruby + +require 'zipf' + +STDERR.write "testing...\n" +test = [] +test_f = ReadFile.new ARGV[0] +n = 0 +errors = 0 +w = SparseVector.from_kv ReadFile.new(ARGV[1]).read, "\t", "\n" +while i = test_f.gets +  x = SparseVector.from_kv(i.strip, '=', ' ') +  m = w.dot(x) +  if m <= 0.0 +    errors += 1 +    puts -1 +  else +    puts 1 +  end +  n += 1 +  STDERR.write "#{n}\n" if n%1000==0 +end +STDERR.write " test set size = #{n}\n" + +STDERR.write "accuracy = #{(n-errors)/n.to_f}\n" + @@ -18,7 +18,7 @@ class KbestItem    end  end -w = SparseVector.from_kv ReadFile.new(ARGV[0]).read, "\t", "\n" +w = SparseVector.from_kv ReadFile.new(ARGV[0]).read, /\s/, "\n"  def o kl    scores = [] diff --git a/rerank_output_lists.rb b/rerank_output_lists.rb new file mode 100755 index 0000000..22bd9ca --- /dev/null +++ b/rerank_output_lists.rb @@ -0,0 +1,48 @@ +#!/usr/bin/env ruby + +require 'zipf' + +class KbestItem +  attr_accessor :rank, :model, :rr, :gold, :f +  def initialize s +    a = s.split "\t" +    @rank = a[0].to_i +    @gold = a[1].to_f +    @model = a[2].to_f +    @rr    = -1.0 +    @f = SparseVector.from_kv a[3], "=", " " +  end + +  def to_s +    return "#{@model}\t#{@gold}\t#{@rank}\t#{@rr}" +  end +end + +w = SparseVector.from_kv ReadFile.new(ARGV[0]).read, "\t", "\n" + +def o kl, j +  f = WriteFile.new "lists/#{j}.model" +  g = WriteFile.new "lists/#{j}.rr" +  kl.each { |i| +    f.write "#{i.gold}\t#{i.model}\n" +    g.write "#{i.gold}\t#{i.rr}\n" +  } +end + +`mkdir -p lists` +STDERR.write "reranking..\n" +cur = [] +k_sum = 0 +j = 0 +while line = STDIN.gets +  item = KbestItem.new line.strip +  item.rr = w.dot(item.f) +  if item.rank == 0 && cur.size > 0 +    o cur, j +    cur = [] +    j += 1 +  end +  cur << item +end +o cur, j + diff --git a/test_n_learn.rb b/test_n_learn.rb new file mode 100755 index 0000000..03c0cd4 --- /dev/null +++ b/test_n_learn.rb @@ -0,0 +1,32 @@ +#!/usr/bin/env ruby + +require 'zipf' + +STDERR.write "reading test data...\n" +test = [] +test_f = ReadFile.new ARGV[0] +threshold = ARGV[2].to_f +signals = ReadFile.new(ARGV[1]).readlines_strip.map{|i| (i.to_f)>=threshold} +n = 0 +while i = test_f.gets +  test << SparseVector.from_kv(i.strip, '=', ' ') +  n += 1 +  STDERR.write "#{n}\n" if n%1000==0 +end +STDERR.write " test set size = #{test.size}\n" + +errors = 0 +w = SparseVector.from_kv ReadFile.new(ARGV[1]).read, "\t", "\n" + +test.each { |x| +  m = w.dot(x) +  if m <= 0.0 +    errors += 1 +    puts -1 +  else +    puts 1 +  end +} + +STDERR.write "accuracy = #{(test.size-errors)/test.size.to_f}\n" + diff --git a/voted_rerank.rb b/voted_rerank.rb new file mode 100755 index 0000000..684825d --- /dev/null +++ b/voted_rerank.rb @@ -0,0 +1,67 @@ +#!/usr/bin/env ruby + +require 'zipf' + +class KbestItem +  attr_accessor :rank, :model, :rr, :gold, :f +  def initialize s +    a = s.split "\t" +    @rank = a[0].to_i +    @gold = a[1].to_f +    @model = a[2].to_f +    @rr    = -1.0 +    @f = SparseVector.from_kv a[3], "=", " " +  end + +  def to_s +    return "#{@model}\t#{@gold}" +  end +end + +ws = [] +cs = [] +ReadFile.readlines_strip(ARGV[0]).each { |l| +  c, s = l.split "\t" +  cs << c.to_i +  next if !s||s.strip=="" +  ws << SparseVector.from_kv(s, "=", " ") +} + +def sign(x) +  if x <= 0 +    return -1.0 +  else +    return 1.0 +  end +end + +def o kl +  scores = [] +  scores << kl.first.gold +  kl.sort! { |i,j| j.model <=> i.model } +  scores << kl.first.gold +  kl.sort! { |i,j| j.rr <=> i.rr } +  scores << kl.first.gold + +  puts scores.join "\t" +end + +STDERR.write "reranking..\n" +cur = [] +k_sum = 0 +j = 0 +while line = STDIN.gets +  item = KbestItem.new line.strip +  item.rr = 0 +  ws.each_with_index{ |w,j| +    item.rr += sign(w.dot(x))*cs[j] +  } +  if item.rank == 0 && cur.size > 0 +    o cur +    cur = [] +    j += 1 +  end +  cur << item +end +o cur + | 
