summaryrefslogtreecommitdiff
path: root/lib/zipf/Translation.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-16 17:44:07 +0200
committerPatrick Simianer <p@simianer.de>2014-06-16 17:44:07 +0200
commit4059a5d048cb0f72872c98073ef1ce120a30d78c (patch)
tree4fbff0dc62c5ef3deea0ffdec578e3f2c0ed74b6 /lib/zipf/Translation.rb
parent912ff6aebcf4f89f9e64b5f59956dbf7d8f624e3 (diff)
renaming to zipf
Diffstat (limited to 'lib/zipf/Translation.rb')
-rw-r--r--lib/zipf/Translation.rb72
1 files changed, 72 insertions, 0 deletions
diff --git a/lib/zipf/Translation.rb b/lib/zipf/Translation.rb
new file mode 100644
index 0000000..3759a1d
--- /dev/null
+++ b/lib/zipf/Translation.rb
@@ -0,0 +1,72 @@
+class Translation
+ attr_accessor :id, :s, :raw, :f, :scores, :rank
+
+ def initialize id=nil, raw=nil, s=nil, f=nil, scores={}, rank=nil
+ @id = id
+ @raw = raw
+ @s = s
+ @f = f
+ @scores = scores
+ @rank = rank
+ end
+
+ def from_s t, strip_alignment=true, rank=nil
+ id, raw, features, score = splitpipe(t, 3)
+ raw.strip!
+ @raw = raw
+ if strip_alignment # the way moses does it
+ @s = @raw.gsub(/\s*\|\d+-\d+\||\|-?\d+\|\s*/, ' ').gsub(/\s+/, ' ')
+ @s.strip!
+ else
+ @s = raw
+ end
+ @id = id.to_i
+ @f = SparseVector.from_kv features
+ @scores[:decoder] = score.to_f
+ @rank = rank
+ end
+
+ def self.from_s s
+ t = self.new
+ t.from_s s
+ return t
+ end
+
+ def to_s include_features=true
+ [@id, @s, @f.to_kv('=', ' '), @scores[:decoder]].join(' ||| ') if include_features
+ [@id, @s, @scores[:decoder]].join(' ||| ') if !include_features
+ end
+
+ def to_s2
+ [@rank, @s, @score, @scores.to_s].join ' ||| '
+ end
+end
+
+def read_kbest_lists fn, translation_type=Translation
+ kbest_lists = []
+ cur = []
+ f = ReadFile.new fn
+ prev = -1
+ c = 0
+ id = 0
+ while line = f.gets
+ t = translation_type.new
+ t.from_s line
+ c = splitpipe(line)[0].to_i
+ if c != prev
+ if cur.size > 0
+ kbest_lists << cur
+ cur = []
+ end
+ prev = c
+ id = 0
+ end
+ t.id = id
+ cur << t
+ id += 1
+ end
+ kbest_lists << cur # last one
+ f.close
+ return kbest_lists
+end
+