summaryrefslogtreecommitdiff
path: root/simple_decoder/simple_decoder.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-14 16:46:27 +0200
committerPatrick Simianer <p@simianer.de>2014-06-14 16:46:27 +0200
commit26c490f404731d053a6205719b6246502c07b449 (patch)
tree3aa721098f1251dfbf2249ecd2736434c13b1d48 /simple_decoder/simple_decoder.rb
init
Diffstat (limited to 'simple_decoder/simple_decoder.rb')
-rwxr-xr-xsimple_decoder/simple_decoder.rb171
1 files changed, 171 insertions, 0 deletions
diff --git a/simple_decoder/simple_decoder.rb b/simple_decoder/simple_decoder.rb
new file mode 100755
index 0000000..fca4eb4
--- /dev/null
+++ b/simple_decoder/simple_decoder.rb
@@ -0,0 +1,171 @@
+#!/usr/bin/env ruby
+
+require 'nlp_ruby'
+require 'trollop'
+
+
+class OrderedHypothesisStack < Array
+
+ def sort!
+ self.sort_by! { |i| i.score }
+ end
+end
+
+class Rule
+ attr_accessor :left, :right, :features
+
+ def initialize left, right, features
+ @left = left
+ @right = right
+ @features = features
+ end
+end
+
+class Coverage
+
+ def initialize a
+ @_a = a
+ @_size = nil
+ end
+
+ def set tuple
+ tuple[0].upto(tuple[1]) { |i|
+ @_a[i] = true
+ }
+ end
+
+ def clone
+ new_a = []
+ @_a.each_with_index { |e,i|
+ if e
+ new_a << true
+ else
+ new_a << nil
+ end
+ }
+ return Coverage.new new_a
+ end
+
+ def to_s
+ [@_a.to_s, size]
+ end
+
+ def free? tuple
+ tuple[0].upto(tuple[1]) { |i|
+ return false if @_a[i]
+ }
+ return true
+ end
+
+ def size
+ return @_size if @_size
+ sum = 0
+ @_a.each { |i| sum += 1 if i }
+ @_size = sum
+ return @_size
+ end
+end
+
+class Hypothesis
+ attr_accessor :rule, :score, :coverage, :previous, :span, :final
+
+ def initialize rule, score, coverage, previous, span
+ @rule = rule
+ @score = score
+ @coverage = coverage
+ @previous = previous
+ @span = span
+ @final = false
+ end
+
+ def to_s
+ [@score, @rule, @coverage.to_s, "TODO", span.to_s].to_s
+ end
+
+ def applicable? span
+ return @coverage.free? span
+ end
+end
+
+def get_translation_options tokenized_input, phrase_table
+ translation_options = {}
+ tokenized_input.each_with_index { |token, i|
+ i.upto(tokenized_input.size-1) { |j|
+ span = [i, j]
+ str = tokenized_input[i..j].join ' '
+ translation_options[span] = [[str, {'f2e'=>-1.0}]] if i==j
+ translation_options[span] = [] if j>i
+ if phrase_table.has_key? str
+ phrase_table[str].each { |right_side|
+ translation_options[span] << right_side
+ }
+ end
+ }
+ }
+ return translation_options
+end
+
+def main
+ cfg = Trollop::options do
+ opt :debug, "debug mode", :type => :bool, :default => false
+ opt :stack_size, "max. stack size", :type => :int, :default => 100
+ end
+ if !cfg[:debug]
+ input = STDIN.gets.strip
+ phrase_table = read_phrase_table './data/example.phrases'
+ else
+ input = 'a b'
+ phrase_table = read_phrase_table './data/debug.phrases'
+ end
+ tokenized_input = tokenize input
+ translation_options = get_translation_options tokenized_input, phrase_table
+ a = []
+ (0).upto(tokenized_input.size-1) { a << nil }
+ initial_coverage = Coverage.new a
+ empty_hypothesis = Hypothesis.new nil, 0.0, initial_coverage, nil, nil
+ stack = OrderedHypothesisStack.new
+ stack.push empty_hypothesis
+ STDERR.write "input size: #{tokenized_input.size}\n"
+ while not stack.size == 0
+ hyp = stack.pop
+ translation_options.each_pair { |span, options|
+ if hyp.applicable? span
+ new_coverage = hyp.coverage.clone
+ new_coverage.set span
+ options.each { |opt|
+ stack.push Hypothesis.new opt[0], hyp.score+opt[1]['f2e'], new_coverage, hyp, span
+ }
+ else
+ if hyp.coverage.size == tokenized_input.size and !hyp.final
+ str = []
+ scores = []
+ 0.upto(tokenized_input.size-1) { str << '' }
+ score = hyp.score
+ cur_hyp = hyp
+ while true
+ break if !cur_hyp.rule
+ a = cur_hyp.rule.split
+ i = cur_hyp.span[0]
+ scores << cur_hyp.score
+ a.each { |tok|
+ str[i] = tok
+ i += 1
+ }
+ str[i-1] += " |"
+ cur_hyp = cur_hyp.previous
+ end
+ puts "#{score} #{str.join(' ')}|| #{scores.to_s}\n"
+ hyp.final = true
+ end
+ end
+ }
+ stack.sort!
+ if stack.size > cfg[:stack_size]
+ stack = stack[stack.size-cfg[:stack_size]..stack.size-1]
+ end
+ end
+end
+
+
+main
+