#!/usr/bin/env ruby require 'zipf' require 'trollop' class OrderedHypothesisStack < Array def sort! self.sort_by! { |i| i.score } end end class Rule attr_accessor :left, :right, :features def initialize left, right, features @left = left @right = right @features = features end end class Coverage def initialize a @_a = a @_size = nil end def set tuple tuple[0].upto(tuple[1]) { |i| @_a[i] = true } end def clone new_a = [] @_a.each_with_index { |e,i| if e new_a << true else new_a << nil end } return Coverage.new new_a end def to_s [@_a.to_s, size] end def free? tuple tuple[0].upto(tuple[1]) { |i| return false if @_a[i] } return true end def size return @_size if @_size sum = 0 @_a.each { |i| sum += 1 if i } @_size = sum return @_size end end class Hypothesis attr_accessor :rule, :score, :coverage, :previous, :span, :final def initialize rule, score, coverage, previous, span @rule = rule @score = score @coverage = coverage @previous = previous @span = span @final = false end def to_s [@score, @rule, @coverage.to_s, "TODO", span.to_s].to_s end def applicable? span return @coverage.free? span end end def get_translation_options tokenized_input, phrase_table translation_options = {} tokenized_input.each_with_index { |token, i| i.upto(tokenized_input.size-1) { |j| span = [i, j] str = tokenized_input[i..j].join ' ' translation_options[span] = [[str, {'f2e'=>-1.0}]] if i==j translation_options[span] = [] if j>i if phrase_table.has_key? str phrase_table[str].each { |right_side| translation_options[span] << right_side } end } } return translation_options end def main cfg = Trollop::options do opt :debug, "debug mode", :type => :bool, :default => false opt :stack_size, "max. stack size", :type => :int, :default => 100 end if !cfg[:debug] input = STDIN.gets.strip phrase_table = read_phrase_table './data/example.phrases' else input = 'a b' phrase_table = read_phrase_table './data/debug.phrases' end tokenized_input = tokenize input translation_options = get_translation_options tokenized_input, phrase_table a = [] (0).upto(tokenized_input.size-1) { a << nil } initial_coverage = Coverage.new a empty_hypothesis = Hypothesis.new nil, 0.0, initial_coverage, nil, nil stack = OrderedHypothesisStack.new stack.push empty_hypothesis STDERR.write "input size: #{tokenized_input.size}\n" while not stack.size == 0 hyp = stack.pop translation_options.each_pair { |span, options| if hyp.applicable? span new_coverage = hyp.coverage.clone new_coverage.set span options.each { |opt| stack.push Hypothesis.new opt[0], hyp.score+opt[1]['f2e'], new_coverage, hyp, span } else if hyp.coverage.size == tokenized_input.size and !hyp.final str = [] scores = [] 0.upto(tokenized_input.size-1) { str << '' } score = hyp.score cur_hyp = hyp while true break if !cur_hyp.rule a = cur_hyp.rule.split i = cur_hyp.span[0] scores << cur_hyp.score a.each { |tok| str[i] = tok i += 1 } str[i-1] += " |" cur_hyp = cur_hyp.previous end puts "#{score} #{str.join(' ')}|| #{scores.to_s}\n" hyp.final = true end end } stack.sort! if stack.size > cfg[:stack_size] stack = stack[stack.size-cfg[:stack_size]..stack.size-1] end end end main