summaryrefslogtreecommitdiff
path: root/rerank.rb
diff options
context:
space:
mode:
Diffstat (limited to 'rerank.rb')
-rwxr-xr-xrerank.rb66
1 files changed, 18 insertions, 48 deletions
diff --git a/rerank.rb b/rerank.rb
index 900e0f2..9e2a708 100755
--- a/rerank.rb
+++ b/rerank.rb
@@ -3,72 +3,42 @@
require 'zipf'
class KbestItem
- attr_accessor :rank, :model, :gold, :f, :id
+ attr_accessor :rank, :model, :gold, :f, :model_orig
def initialize s
a = s.split "\t"
@rank = a[0].to_i
@gold = a[1].to_f
@model = a[2].to_f
+ @model_orig = @model
@f = SparseVector.from_kv a[3], "=", " "
- @id = -1
end
-end
-
-
-
-
-
-def dot v, w
- sum = 0.0
- v.each_with_index { |k,i|
- sum += k * w[i]
- }
-
- return sum
-end
-
-def elen v
- len = 0.0
- v.each { |i| len += i**2 }
- return Math.sqrt len
-end
-
-def norm v
- len = elen v
- return v.map { |i| i/len }
+ def to_s
+ return "#{@model}\t#{@gold}"
+ end
end
-STDERR.write "loading feature dict\n"
-fd = Marshal.load ReadFile.read ARGV[0]
-d = fd.size
-STDERR.write "#{d}\n"
-
-STDERR.write "loading model\n"
-w = Marshal.load ReadFile.read ARGV[1]
+`mkdir rrkb`
+w = SparseVector.from_kv ReadFile.new(ARGV[0]).read, "\t", "\n"
STDERR.write "reranking..\n"
-kbest_lists = []
cur = []
+k_sum = 0
+j = 0
while line = STDIN.gets
item = KbestItem.new line.strip
- x = [0.0] * d
- line.split("\t")[3].split.each { |i|
- k,v = i.split '=', 2
- x[fd[k]] = v.to_f
- }
- m = dot(w, norm(x))
- item.model = m
+ item.model = w.dot(item.f)
if item.rank == 0 && cur.size > 0
- kbest_lists << cur
+ cur.sort! { |i,j| j.model <=> i.model }
+ f = WriteFile.new "rrkb/#{j}.gz"
+ f.write cur.map{|x| x.to_s}.join("\n")
+ f.close
+ puts "RERANKED\t#{cur.first.gold}"
cur = []
+ j += 1
end
cur << item
end
-kbest_lists << cur
-
-kbest_lists.each { |l|
- puts "RERANKED\t#{l.sort { |i,j| j.model <=> i.model }.first.gold}"
-}
-
+cur.sort! { |i,j| j.model <=> i.model }
+puts "RERANKED\t#{cur.first.gold}"