summaryrefslogtreecommitdiff
path: root/rerank.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2015-03-18 12:09:28 +0100
committerPatrick Simianer <p@simianer.de>2015-03-18 12:09:28 +0100
commit6ae893b1a83e1f38d2c72ff025fd2a1300919dbc (patch)
treeaba898d6500a90fd53681eaa48e5c3c6645ca39c /rerank.rb
init
Diffstat (limited to 'rerank.rb')
-rwxr-xr-xrerank.rb74
1 files changed, 74 insertions, 0 deletions
diff --git a/rerank.rb b/rerank.rb
new file mode 100755
index 0000000..900e0f2
--- /dev/null
+++ b/rerank.rb
@@ -0,0 +1,74 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+class KbestItem
+ attr_accessor :rank, :model, :gold, :f, :id
+ def initialize s
+ a = s.split "\t"
+ @rank = a[0].to_i
+ @gold = a[1].to_f
+ @model = a[2].to_f
+ @f = SparseVector.from_kv a[3], "=", " "
+ @id = -1
+ end
+end
+
+
+
+
+
+
+def dot v, w
+ sum = 0.0
+ v.each_with_index { |k,i|
+ sum += k * w[i]
+ }
+
+ return sum
+end
+
+def elen v
+ len = 0.0
+ v.each { |i| len += i**2 }
+ return Math.sqrt len
+end
+
+def norm v
+ len = elen v
+ return v.map { |i| i/len }
+end
+
+STDERR.write "loading feature dict\n"
+fd = Marshal.load ReadFile.read ARGV[0]
+d = fd.size
+STDERR.write "#{d}\n"
+
+STDERR.write "loading model\n"
+w = Marshal.load ReadFile.read ARGV[1]
+
+STDERR.write "reranking..\n"
+kbest_lists = []
+cur = []
+while line = STDIN.gets
+ item = KbestItem.new line.strip
+ x = [0.0] * d
+ line.split("\t")[3].split.each { |i|
+ k,v = i.split '=', 2
+ x[fd[k]] = v.to_f
+ }
+ m = dot(w, norm(x))
+ item.model = m
+ if item.rank == 0 && cur.size > 0
+ kbest_lists << cur
+ cur = []
+ end
+ cur << item
+end
+kbest_lists << cur
+
+kbest_lists.each { |l|
+ puts "RERANKED\t#{l.sort { |i,j| j.model <=> i.model }.first.gold}"
+}
+
+