diff options
| author | Patrick Simianer <p@simianer.de> | 2014-06-14 16:46:27 +0200 | 
|---|---|---|
| committer | Patrick Simianer <p@simianer.de> | 2014-06-14 16:46:27 +0200 | 
| commit | 26c490f404731d053a6205719b6246502c07b449 (patch) | |
| tree | 3aa721098f1251dfbf2249ecd2736434c13b1d48 /simple_decoder/simple_decoder.rb | |
init
Diffstat (limited to 'simple_decoder/simple_decoder.rb')
| -rwxr-xr-x | simple_decoder/simple_decoder.rb | 171 | 
1 files changed, 171 insertions, 0 deletions
| diff --git a/simple_decoder/simple_decoder.rb b/simple_decoder/simple_decoder.rb new file mode 100755 index 0000000..fca4eb4 --- /dev/null +++ b/simple_decoder/simple_decoder.rb @@ -0,0 +1,171 @@ +#!/usr/bin/env ruby + +require 'nlp_ruby' +require 'trollop' + + +class OrderedHypothesisStack < Array + +  def sort! +    self.sort_by! { |i| i.score } +  end +end + +class Rule +  attr_accessor :left, :right, :features + +  def initialize left, right, features +    @left = left +    @right = right +    @features = features +  end +end + +class Coverage + +  def initialize a +    @_a = a +    @_size = nil +  end + +  def set tuple +    tuple[0].upto(tuple[1]) { |i| +      @_a[i] = true +    } +  end + +  def clone +    new_a = [] +    @_a.each_with_index { |e,i| +      if e +        new_a << true +      else +        new_a << nil +      end +    } +    return Coverage.new new_a +  end + +  def to_s +    [@_a.to_s, size] +  end + +  def free? tuple +    tuple[0].upto(tuple[1]) { |i| +      return false if @_a[i] +    } +    return true +  end + +  def size +    return @_size if @_size +    sum = 0 +    @_a.each { |i| sum += 1 if i } +    @_size = sum +    return @_size +  end +end + +class Hypothesis +  attr_accessor :rule, :score, :coverage, :previous, :span, :final + +  def initialize rule, score, coverage, previous, span +    @rule = rule +    @score = score +    @coverage = coverage +    @previous = previous +    @span = span +    @final = false +  end + +  def to_s +    [@score, @rule, @coverage.to_s, "TODO", span.to_s].to_s +  end + +  def applicable? span +    return @coverage.free? span +  end +end + +def get_translation_options tokenized_input, phrase_table +  translation_options = {} +  tokenized_input.each_with_index { |token, i| +    i.upto(tokenized_input.size-1) { |j| +       span = [i, j] +       str = tokenized_input[i..j].join ' ' +       translation_options[span] = [[str, {'f2e'=>-1.0}]] if i==j +       translation_options[span] = [] if j>i +       if phrase_table.has_key? str +         phrase_table[str].each { |right_side| +           translation_options[span] << right_side +         } +       end +    } +  } +  return translation_options +end + +def main +  cfg = Trollop::options do +    opt :debug, "debug mode", :type => :bool, :default => false +    opt :stack_size, "max. stack size", :type => :int, :default => 100 +  end +  if !cfg[:debug] +    input = STDIN.gets.strip +    phrase_table = read_phrase_table './data/example.phrases' +  else +    input = 'a b' +    phrase_table = read_phrase_table './data/debug.phrases' +  end +  tokenized_input = tokenize input +  translation_options = get_translation_options tokenized_input, phrase_table +  a = [] +  (0).upto(tokenized_input.size-1) { a << nil } +  initial_coverage = Coverage.new a +  empty_hypothesis = Hypothesis.new nil, 0.0, initial_coverage, nil, nil +  stack = OrderedHypothesisStack.new +  stack.push empty_hypothesis +  STDERR.write "input size: #{tokenized_input.size}\n" +  while not stack.size == 0 +    hyp = stack.pop +    translation_options.each_pair { |span, options| +      if hyp.applicable? span +        new_coverage = hyp.coverage.clone +        new_coverage.set span +        options.each { |opt| +          stack.push Hypothesis.new opt[0], hyp.score+opt[1]['f2e'], new_coverage, hyp, span +        } +      else +        if hyp.coverage.size == tokenized_input.size and !hyp.final +          str = [] +          scores = [] +          0.upto(tokenized_input.size-1) { str << '' } +          score = hyp.score +          cur_hyp = hyp +          while true +            break if !cur_hyp.rule +            a = cur_hyp.rule.split +            i = cur_hyp.span[0] +            scores << cur_hyp.score +            a.each { |tok| +              str[i] = tok +              i += 1 +            } +            str[i-1] += " |" +            cur_hyp = cur_hyp.previous +          end +          puts "#{score} #{str.join(' ')}|| #{scores.to_s}\n" +          hyp.final = true +        end +      end +    } +    stack.sort! +    if stack.size > cfg[:stack_size] +      stack = stack[stack.size-cfg[:stack_size]..stack.size-1] +    end +  end +end + + +main + | 
