From c0daa3e70cc3187f04f67c2cdc0bd3b3217e8aa6 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 14 Feb 2014 17:14:49 +0100 Subject: => 0.3; License and README updated; some from_* methods for SparseVector; ttable.rb => Translation.rb; moved some misc. stuff to misc.rb; monkey patched String --- lib/nlp_ruby/PriorityQueue.rb | 37 ------- lib/nlp_ruby/SparseVector.rb | 71 +++++++------- lib/nlp_ruby/Translation.rb | 66 +++++++++++++ lib/nlp_ruby/bleu.rb | 12 +-- lib/nlp_ruby/cdec.rb | 20 ---- lib/nlp_ruby/dag.rb | 205 +++++++++++++++++++++++++++++++++++++++ lib/nlp_ruby/dags.rb | 218 ------------------------------------------ lib/nlp_ruby/misc.rb | 74 ++++++++++++++ lib/nlp_ruby/semirings.rb | 3 +- lib/nlp_ruby/stringutil.rb | 41 +------- lib/nlp_ruby/ttable.rb | 85 ---------------- 11 files changed, 394 insertions(+), 438 deletions(-) delete mode 100644 lib/nlp_ruby/PriorityQueue.rb create mode 100644 lib/nlp_ruby/Translation.rb delete mode 100644 lib/nlp_ruby/cdec.rb create mode 100644 lib/nlp_ruby/dag.rb delete mode 100644 lib/nlp_ruby/dags.rb delete mode 100644 lib/nlp_ruby/ttable.rb (limited to 'lib/nlp_ruby') diff --git a/lib/nlp_ruby/PriorityQueue.rb b/lib/nlp_ruby/PriorityQueue.rb deleted file mode 100644 index f090e60..0000000 --- a/lib/nlp_ruby/PriorityQueue.rb +++ /dev/null @@ -1,37 +0,0 @@ -# FIXME dags -# this assumes that elements in the queue -# have a numerical member named 'score' -class PriorityQueue - - def initialize a=Array.new - @queue = Array.new a - sort! - end - - def sort! - @queue.sort_by! { |i| -i.score } - end - - def pop - @queue.pop - end - - def push i - @queue << i - sort! - end - - def empty? - @queue.empty? - end - - # FIXME - def to_s - a = [] - @queue.each { |i| - a << "#{i.to_s}[#{i.score}]" - } - "[#{a.join ', '}]" - end -end - diff --git a/lib/nlp_ruby/SparseVector.rb b/lib/nlp_ruby/SparseVector.rb index 1c0262b..b80373c 100644 --- a/lib/nlp_ruby/SparseVector.rb +++ b/lib/nlp_ruby/SparseVector.rb @@ -20,6 +20,34 @@ class SparseVector < Hash from_h eval(s) end + def to_kv sep='=', join=' ' + a = [] + self.each_pair { |k,v| + a << "#{k}#{sep}#{v}" + } + return a.join join + end + + def from_kv s + s.split.each { |i| + k,v = i.split('=') + self[k] = v.to_f + } + end + + def from_file fn, sep='=' + f = ReadFile.new(fn) + while line = f.gets + key, value = line.strip.split sep + value = value.to_f + self[key] = value + end + end + + def join_keys other + self.keys + other.keys + end + def sum self.values.inject(:+) end @@ -74,38 +102,6 @@ class SparseVector < Hash return Math.sqrt(sum) end - # FIXME - def from_kv_file fn, sep=' ' - f = ReadFile.new(fn) - while line = f.gets - key, value = line.strip.split sep - value = value.to_f - self[key] = value - end - end - - # FIXME - def to_kv sep='=' - a = [] - self.each_pair { |k,v| - a << "#{k}#{sep}#{v}" - } - return a.join ' ' - end - - # FIXME - def to_kv2 sep='=' - a = [] - self.each_pair { |k,v| - a << "#{k}#{sep}#{v}" - } - return a.join "\n" - end - - def join_keys other - self.keys + other.keys - end - def + other new = SparseVector.new join_keys(other).each { |k| @@ -132,9 +128,13 @@ class SparseVector < Hash end end -def mean_sparse_vector array_of_vectors + +module SparseVector + + +def SparseVector::mean a mean = SparseVector.new - array_of_vectors.each { |i| + a.each { |i| i.each_pair { |k,v| mean[k] += v } @@ -144,3 +144,6 @@ def mean_sparse_vector array_of_vectors return mean end + +end + diff --git a/lib/nlp_ruby/Translation.rb b/lib/nlp_ruby/Translation.rb new file mode 100644 index 0000000..0c346a4 --- /dev/null +++ b/lib/nlp_ruby/Translation.rb @@ -0,0 +1,66 @@ +class Translation + attr_accessor :id, :s, :raw, :f, :score, :rank, :other_score + + def initialize id=nil, raw=nil, s=nil, f=nil, scores={}, rank=nil + @id = id + @raw = raw + @s = s + @f = f + @scores = scores + @rank = rank + end + + def from_s t, strip_alignment=true, rank=nil + id, raw, features, score = splitpipe(t, 3) + raw.strip! + @raw = raw + if strip_alignment # the way moses does it + @s = @raw.gsub(/\s*\|\d+-\d+\||\|-?\d+\|\s*/, ' ').gsub(/\s+/, ' ') + @s.strip! + else + @s = raw + end + @id = id.to_i + @f = read_feature_string features + @scores['decoder'] = score.to_f + @rank = rank + end + + def to_s include_features=true + [@id, @s, @f.to_kv('=', ' '), @scores['decoder']].join(' ||| ') if include_features + [@id, @s, @scores['decoder']].join(' ||| ') if !include_features + end + + def to_s2 + [@rank, @s, @score, @scores.to_s].join ' ||| ' + end +end + +def read_kbest_lists fn, translation_type=Translation + kbest_lists = [] + cur = [] + f = ReadFile.new fn + prev = -1 + c = 0 + id = 0 + while line = f.gets + t = translation_type.new + t.from_s line + c = splitpipe(line)[0].to_i + if c != prev + if cur.size > 0 + kbest_lists << cur + cur = [] + end + prev = c + id = 0 + end + t.id = id + cur << t + id += 1 + end + kbest_lists << cur # last one + f.close + return kbest_lists +end + diff --git a/lib/nlp_ruby/bleu.rb b/lib/nlp_ruby/bleu.rb index ee91985..d7a6b2b 100644 --- a/lib/nlp_ruby/bleu.rb +++ b/lib/nlp_ruby/bleu.rb @@ -79,9 +79,9 @@ def BLEU::get_counts hypothesis, reference, n, times=1 return p end -def BLEU::brevity_penalty c, r +def BLEU::brevity_penalty c, r, hack=0.0 return 1.0 if c>r - return Math.exp(1-r/c) + return Math.exp 1.0-((r+hack)/c) end def BLEU::bleu counts, n, debug=false @@ -105,7 +105,7 @@ def BLEU::hbleu counts, n, debug=false (100*bleu(counts, n, debug)).round(3) end -def BLEU::per_sentence_bleu hypothesis, reference, n=4 +def BLEU::per_sentence_bleu hypothesis, reference, n=4, hack=0.0 h_ng = {}; r_ng = {} (1).upto(n) {|i| h_ng[i] = []; r_ng[i] = []} ngrams(hypothesis, n) {|i| h_ng[i.size] << i} @@ -117,13 +117,13 @@ def BLEU::per_sentence_bleu hypothesis, reference, n=4 (1).upto(m) { |i| counts_clipped = 0 counts_sum = h_ng[i].size - h_ng[i].uniq.each {|j| counts_clipped += r_ng[i].count(j)} + h_ng[i].uniq.each { |j| counts_clipped += r_ng[i].count(j) } add = 1.0 if i >= 2 sum += weight * Math.log((counts_clipped + add)/(counts_sum + add)); - } + } return brevity_penalty(hypothesis.strip.split.size, reference.strip.split.size) * Math.exp(sum) end -end +end # module diff --git a/lib/nlp_ruby/cdec.rb b/lib/nlp_ruby/cdec.rb deleted file mode 100644 index 1080f14..0000000 --- a/lib/nlp_ruby/cdec.rb +++ /dev/null @@ -1,20 +0,0 @@ -module CDEC - -require 'open3' - - -# FIXME -CDEC_BINARY = "/toolbox/cdec-dtrain/decoder/cdec" - - -def CDEC::kbest input, ini, weights, k, unique=true - o, s = Open3.capture2 "echo \"#{input}\" | #{CDEC_BINARY} -c #{ini} -w #{weights} -k #{k} -r 2>/dev/null" - j = -1 - ret = [] - o.split("\n").map{|i| j+=1; t=Translation.new; t.from_s(i, false, j); ret << t} - return ret -end - - -end - diff --git a/lib/nlp_ruby/dag.rb b/lib/nlp_ruby/dag.rb new file mode 100644 index 0000000..cca35c5 --- /dev/null +++ b/lib/nlp_ruby/dag.rb @@ -0,0 +1,205 @@ +module DAG + +require 'json' + + +class DAG::Node + attr_accessor :label, :edges, :incoming, :score, :mark + + def initialize label=nil, edges=[], incoming=[], score=nil + @label = label + @edges = edges # outgoing + @incoming = incoming + @score = nil + end + + def add_edge head, weight=0 + exit if self==head # no self-cycles! + @edges << DAG::Edge.new(self, head, weight) + return @edges.last + end + + def to_s + "DAG::Node" + end + + def repr + "#{to_s} #{@score} out:#{@edges} in:[#{@incoming.map{|e| e.to_s}.join ', '}]" + end +end + +class DAG::Edge + attr_accessor :tail, :head, :weight, :mark + + def initialize tail=nil, head=nil, weight=0 + @tail = tail + @head = head + @weight = weight + @mark = false # did we already follow this edge? -- for topological sorting + end + + def to_s + s = "DAG::Edge<#{@tail} ->[#{weight}] #{@head}" + s += " x" if @mark + s += ">" + s + end +end + +# depth-first search +# w/o markings as we do not have cycles +def DAG::dfs n, target_label + return n if n.label==target_label # assumes uniq labels! + stack = n.edges.map { |i| i.head } + while !stack.empty? + m = stack.pop + return DAG::dfs m, target_label + end + return nil +end + +# breadth-first search +# w/o markings as we do not have cycles +def DAG::bfs n, target_label + queue = [n] + while !queue.empty? + m = queue.shift + return m if m.label==target_label + m.edges.each { |e| queue << e.head } + end + return nil +end + +# topological sort +def DAG::topological_sort graph + sorted = [] + s = graph.reject { |n| !n.incoming.empty? } + while !s.empty? + sorted << s.shift + sorted.last.edges.each { |e| + e.mark = true + s << e.head if e.head.incoming.reject{|f| f.mark}.empty? + } + end + return sorted +end + +# initialize graph scores with semiring One +def DAG::init graph, semiring, source_node + graph.each {|n| n.score=semiring.null} + source_node.score = semiring.one +end + +# viterbi +def DAG::viterbi graph, semiring=ViterbiSemiring, source_node + toposorted = DAG::topological_sort(graph) + DAG::init(graph, semiring, source_node) + toposorted.each { |n| + n.incoming.each { |e| + # update + n.score = \ + semiring.add.call(n.score, \ + semiring.multiply.call(e.tail.score, e.weight) + ) + } + } +end + +# forward viterbi +def DAG::viterbi_forward graph, semiring=ViterbiSemiring, source_node + toposorted = DAG::topological_sort(graph) + DAG::init(graph, semiring, source_node) + toposorted.each { |n| + n.edges.each { |e| + e.head.score = \ + semiring.add.call(e.head.score, \ + semiring.multiply.call(n.score, e.weight) + ) + } + } +end + +# Dijkstra algorithm +# for A*-search we would need an optimistic estimate of +# future cost at each node +def DAG::dijkstra graph, semiring=RealSemiring.new, source_node + DAG::init(graph, semiring, source_node) + q = PriorityQueue.new graph + while !q.empty? + n = q.pop + n.edges.each { |e| + e.head.score = \ + semiring.add.call(e.head.score, \ + semiring.multiply.call(n.score, e.weight)) + q.sort! + } + end +end + +# Bellman-Ford algorithm +def DAG::bellman_ford(graph, semiring=RealSemiring.new, source_node) + DAG::init(graph, semiring, source_node) + edges = [] + graph.each { |n| edges |= n.edges } + # relax edges + (graph.size-1).times{ |i| + edges.each { |e| + e.head.score = \ + semiring.add.call(e.head.score, \ + semiring.multiply.call(e.tail.score, e.weight)) + } + } + # we do not allow cycles (negative or positive) +end + +# Floyd algorithm +def DAG::floyd(graph, semiring=nil) + dist_matrix = [] + graph.each_index { |i| + dist_matrix << [] + graph.each_index { |j| + val = 1.0/0.0 + val = 0.0 if i==j + dist_matrix.last << val + } + } + edges = [] + graph.each { |n| edges |= n.edges } + edges.each { |e| + dist_matrix[graph.index(e.tail)][graph.index(e.head)] = e.weight + } + 0.upto(graph.size-1) { |k| + 0.upto(graph.size-1) { |i| + 0.upto(graph.size-1) { |j| + if dist_matrix[i][k] + dist_matrix[k][j] < dist_matrix[i][j] + dist_matrix [i][j] = dist_matrix[i][k] + dist_matrix[k][j] + end + } + } + } + return dist_matrix +end + + +# returns a list of nodes (graph) and a hash for finding +# nodes by their label (these need to be unique!) +def DAG::read_graph_from_json fn, semiring=RealSemiring.new + graph = [] + nodes_by_label = {} + h = JSON.parse File.new(fn).read + h['nodes'].each { |i| + n = DAG::Node.new i['label'] + graph << n + nodes_by_label[n.label] = n + } + h['edges'].each { |i| + n = nodes_by_label[i['tail']] + a = n.add_edge(nodes_by_label[i['head']], semiring.convert.call(i['weight'].to_f)) + nodes_by_label[i['head']].incoming << a + } + return graph, nodes_by_label +end + + +end # module + diff --git a/lib/nlp_ruby/dags.rb b/lib/nlp_ruby/dags.rb deleted file mode 100644 index 7767be1..0000000 --- a/lib/nlp_ruby/dags.rb +++ /dev/null @@ -1,218 +0,0 @@ -########################### -# TODO -# output paths -# visualization? -# algorithms: -# beam search -# best-first -# kbest -# kruskal (MST)? -# transitive closure? -########################### - -require 'json' - - -module DAG - - -class DAG::Node - attr_accessor :label, :edges, :incoming, :score, :mark - - def initialize label=nil, edges=[], incoming=[], score=nil - @label = label - @edges = edges # outgoing - @incoming = incoming - @score = nil - end - - def add_edge head, weight=0 - exit if self==head # no self-cycles! - @edges << DAG::Edge.new(self, head, weight) - return @edges.last - end - - def to_s - "DAG::Node" - end - - def repr - "#{to_s} #{@score} out:#{@edges} in:[#{@incoming.map{|e| e.to_s}.join ', '}]" - end -end - -class DAG::Edge - attr_accessor :tail, :head, :weight, :mark - - def initialize tail=nil, head=nil, weight=0 - @tail = tail - @head = head - @weight = weight - @mark = false # did we already follow this edge? -- for topological sorting - end - - def to_s - s = "DAG::Edge<#{@tail} ->[#{weight}] #{@head}" - s += " x" if @mark - s += ">" - s - end -end - -# depth-first search -# w/o markings as we do not have cycles -def DAG::dfs n, target_label - return n if n.label==target_label # assumes uniq labels! - stack = n.edges.map { |i| i.head } - while !stack.empty? - m = stack.pop - return DAG::dfs m, target_label - end - return nil -end - -# breadth-first search -# w/o markings as we do not have cycles -def DAG::bfs n, target_label - queue = [n] - while !queue.empty? - m = queue.shift - return m if m.label==target_label - m.edges.each { |e| queue << e.head } - end - return nil -end - -# topological sort -def DAG::topological_sort graph - sorted = [] - s = graph.reject { |n| !n.incoming.empty? } - while !s.empty? - sorted << s.shift - sorted.last.edges.each { |e| - e.mark = true - s << e.head if e.head.incoming.reject{|f| f.mark}.empty? - } - end - return sorted -end - -# initialize graph scores with semiring One -def DAG::init graph, semiring, source_node - graph.each {|n| n.score=semiring.null} - source_node.score = semiring.one -end - -# viterbi -def DAG::viterbi graph, semiring=ViterbiSemiring, source_node - toposorted = DAG::topological_sort(graph) - DAG::init(graph, semiring, source_node) - toposorted.each { |n| - n.incoming.each { |e| - # update - n.score = \ - semiring.add.call(n.score, \ - semiring.multiply.call(e.tail.score, e.weight) - ) - } - } -end - -# forward viterbi -def DAG::viterbi_forward graph, semiring=ViterbiSemiring, source_node - toposorted = DAG::topological_sort(graph) - DAG::init(graph, semiring, source_node) - toposorted.each { |n| - n.edges.each { |e| - e.head.score = \ - semiring.add.call(e.head.score, \ - semiring.multiply.call(n.score, e.weight) - ) - } - } -end - -# Dijkstra algorithm -# for A*-search we would need an optimistic estimate of -# future cost at each node -def DAG::dijkstra graph, semiring=RealSemiring.new, source_node - DAG::init(graph, semiring, source_node) - q = PriorityQueue.new graph - while !q.empty? - n = q.pop - n.edges.each { |e| - e.head.score = \ - semiring.add.call(e.head.score, \ - semiring.multiply.call(n.score, e.weight)) - q.sort! - } - end -end - -# Bellman-Ford algorithm -def DAG::bellman_ford(graph, semiring=RealSemiring.new, source_node) - DAG::init(graph, semiring, source_node) - edges = [] - graph.each { |n| edges |= n.edges } - # relax edges - (graph.size-1).times{ |i| - edges.each { |e| - e.head.score = \ - semiring.add.call(e.head.score, \ - semiring.multiply.call(e.tail.score, e.weight)) - } - } - # we do not allow cycles (negative or positive) -end - -# Floyd algorithm -def DAG::floyd(graph, semiring=nil) - dist_matrix = [] - graph.each_index { |i| - dist_matrix << [] - graph.each_index { |j| - val = 1.0/0.0 - val = 0.0 if i==j - dist_matrix.last << val - } - } - edges = [] - graph.each { |n| edges |= n.edges } - edges.each { |e| - dist_matrix[graph.index(e.tail)][graph.index(e.head)] = e.weight - } - 0.upto(graph.size-1) { |k| - 0.upto(graph.size-1) { |i| - 0.upto(graph.size-1) { |j| - if dist_matrix[i][k] + dist_matrix[k][j] < dist_matrix[i][j] - dist_matrix [i][j] = dist_matrix[i][k] + dist_matrix[k][j] - end - } - } - } - return dist_matrix -end - - -# returns a list of nodes (graph) and a hash for finding -# nodes by their label (these need to be unique!) -def DAG::read_graph_from_json fn, semiring=RealSemiring.new - graph = [] - nodes_by_label = {} - h = JSON.parse File.new(fn).read - h['nodes'].each { |i| - n = DAG::Node.new i['label'] - graph << n - nodes_by_label[n.label] = n - } - h['edges'].each { |i| - n = nodes_by_label[i['tail']] - a = n.add_edge(nodes_by_label[i['head']], semiring.convert.call(i['weight'].to_f)) - nodes_by_label[i['head']].incoming << a - } - return graph, nodes_by_label -end - - -end # module - diff --git a/lib/nlp_ruby/misc.rb b/lib/nlp_ruby/misc.rb index 80d932c..0f58100 100644 --- a/lib/nlp_ruby/misc.rb +++ b/lib/nlp_ruby/misc.rb @@ -21,6 +21,40 @@ class Array end end +class String + + def downcase? s + s[/[[:lower:]]/] + end +end + +class PriorityQueue +# This assumes that elements in the queue +# have a numerical member named 'score'. + + def initialize a=Array.new + @queue = Array.new a + sort! + end + + def sort! + @queue.sort_by! { |i| -i.score } + end + + def pop + @queue.pop + end + + def push i + @queue << i + sort! + end + + def empty? + @queue.empty? + end +end + def spawn_with_timeout cmd, t=4, debug=false require 'timeout' STDERR.write cmd+"\n" if debug @@ -37,4 +71,44 @@ def spawn_with_timeout cmd, t=4, debug=false return pipe_in.read end +def read_phrase_table fn + table = {} + f = ReadFile.new fn + while raw_rule = f.gets + french, english, features = splitpipe(raw_rule) + feature_map = read_feature_string(features) + if table.has_key? french + table[french] << [english, feature_map ] + else + table[french] = [[english, feature_map]] + end + end + f.close + return table +end + +def cdec_kbest cdec_bin, input, ini, weights, k, unique=true + require 'open3' + cmd = "echo \"#{input}\" | #{cdec_bin} -c #{ini} -w #{weights} -k #{k}" + cmd += " -r" if unique + o,_ = Open3.capture2 "#{cmd} 2>/dev/null" + a = []; j = -1 + o.split("\n").map{ |i| j+=1; t=Translation.new; t.from_s(i, false, j); a << t } + return a +end + +def read_config fn + f = ReadFile.new fn + cfg = {} + while line = f.gets + line.strip! + next if /^\s*$/.match line + next if line[0]=='#' + content = line.split('#', 2).first + k, v = content.split(/\s*=\s*/, 2) + k.strip!; v.strip! + cfg[k] = v + end + return cfg +end diff --git a/lib/nlp_ruby/semirings.rb b/lib/nlp_ruby/semirings.rb index a06f151..83551a9 100644 --- a/lib/nlp_ruby/semirings.rb +++ b/lib/nlp_ruby/semirings.rb @@ -1,4 +1,5 @@ -# semirings for graphs as described in +# Semirings for directed acyclic graphs (dags) (also directed hypergraphs), +# as described in: # 'Dynamic Programming Algorithms in # Semiring and Hypergraph Frameworks' (Liang Huang) class Semiring diff --git a/lib/nlp_ruby/stringutil.rb b/lib/nlp_ruby/stringutil.rb index d7381bb..aa9be00 100644 --- a/lib/nlp_ruby/stringutil.rb +++ b/lib/nlp_ruby/stringutil.rb @@ -1,17 +1,7 @@ -# whitespace 'tokenizer' def tokenize s s.strip.split end -def splitpipe s, n=3 - s.strip.split("|"*n) -end - -def downcase? s - s[/[[:lower:]]/] -end - -# iterator over n-grams def ngrams(s, n, fix=false) a = tokenize s a.each_with_index { |tok, i| @@ -22,34 +12,11 @@ def ngrams(s, n, fix=false) } end -# a=1.0 b=2.0 => { 'a' => 1.0, 'b' => 2.0 } -def read_feature_string s - map = SparseVector.new - tokenize(s).each { |i| - key, value = i.split '=' - map[key] = value.to_f - } - return map -end - - -def read_cfg fn - f = ReadFile.new fn - cfg = {} - while line = f.gets - line.strip! - next if /^\s*$/.match line - next if line[0]=='#' - content = line.split('#', 2).first - k, v = content.split(/\s*=\s*/, 2) - k.strip!; v.strip! - cfg[k] = v - end - return cfg -end - def bag_of_words s, stopwords=[] s.strip.split.uniq.sort.reject{ |w| stopwords.include? w } -end +end +def splitpipe s, n=3 + s.strip.split("|"*n) +end diff --git a/lib/nlp_ruby/ttable.rb b/lib/nlp_ruby/ttable.rb deleted file mode 100644 index c0f37be..0000000 --- a/lib/nlp_ruby/ttable.rb +++ /dev/null @@ -1,85 +0,0 @@ -# table['some French string'] = [Array of English strings] -def read_phrase_table fn - table = {} - f = ReadFile.new fn - while raw_rule = f.gets - french, english, features = splitpipe(raw_rule) - feature_map = read_feature_string(features) - if table.has_key? french - table[french] << [english, feature_map ] - else - table[french] = [[english, feature_map]] - end - end - f.close - return table -end - -# FIXME -class Translation - attr_accessor :id, :s, :raw, :f, :score, :rank, :other_score - - def initialize id=nil, raw=nil, s=nil, f=nil, score=nil, rank=nil, other_score=nil - @id = id - @raw = raw - @s = s - @f = f - @score = score - @rank = rank - @other_score = other_score - end - - def from_s t, strip_alignment=true, rank=nil - id, raw, features, score = splitpipe(t, 3) - raw.strip! - @raw = raw - if strip_alignment # the way moses does it - @s = @raw.gsub(/\s*\|\d+-\d+\||\|-?\d+\|\s*/, ' ').gsub(/\s+/, ' ') - @s.strip! - else - @s = raw - end - @id = id.to_i - @f = read_feature_string features - @score = score.to_f - @rank = rank - @other_score = nil - end - - def to_s - [@id, @s, @f.to_kv, @score].join ' ||| ' - end - - def to_s2 - [@rank, @s, @score, @other_score].join ' ||| ' - end -end - -def read_kbest_lists fn, translation_type=Translation - kbest_lists = [] - cur = [] - f = ReadFile.new fn - prev = -1 - c = 0 - id = 0 - while line = f.gets - t = translation_type.new - t.from_s line - c = splitpipe(line)[0].to_i - if c != prev - if cur.size > 0 - kbest_lists << cur - cur = [] - end - prev = c - id = 0 - end - t.id = id - cur << t - id += 1 - end - kbest_lists << cur # last one - f.close - return kbest_lists -end - -- cgit v1.2.3