1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
|
#!/usr/bin/env ruby
require 'nlp_ruby'
require 'trollop'
class OrderedHypothesisStack < Array
def sort!
self.sort_by! { |i| i.score }
end
end
class Rule
attr_accessor :left, :right, :features
def initialize left, right, features
@left = left
@right = right
@features = features
end
end
class Coverage
def initialize a
@_a = a
@_size = nil
end
def set tuple
tuple[0].upto(tuple[1]) { |i|
@_a[i] = true
}
end
def clone
new_a = []
@_a.each_with_index { |e,i|
if e
new_a << true
else
new_a << nil
end
}
return Coverage.new new_a
end
def to_s
[@_a.to_s, size]
end
def free? tuple
tuple[0].upto(tuple[1]) { |i|
return false if @_a[i]
}
return true
end
def size
return @_size if @_size
sum = 0
@_a.each { |i| sum += 1 if i }
@_size = sum
return @_size
end
end
class Hypothesis
attr_accessor :rule, :score, :coverage, :previous, :span, :final
def initialize rule, score, coverage, previous, span
@rule = rule
@score = score
@coverage = coverage
@previous = previous
@span = span
@final = false
end
def to_s
[@score, @rule, @coverage.to_s, "TODO", span.to_s].to_s
end
def applicable? span
return @coverage.free? span
end
end
def get_translation_options tokenized_input, phrase_table
translation_options = {}
tokenized_input.each_with_index { |token, i|
i.upto(tokenized_input.size-1) { |j|
span = [i, j]
str = tokenized_input[i..j].join ' '
translation_options[span] = [[str, {'f2e'=>-1.0}]] if i==j
translation_options[span] = [] if j>i
if phrase_table.has_key? str
phrase_table[str].each { |right_side|
translation_options[span] << right_side
}
end
}
}
return translation_options
end
def main
cfg = Trollop::options do
opt :debug, "debug mode", :type => :bool, :default => false
opt :stack_size, "max. stack size", :type => :int, :default => 100
end
if !cfg[:debug]
input = STDIN.gets.strip
phrase_table = read_phrase_table './data/example.phrases'
else
input = 'a b'
phrase_table = read_phrase_table './data/debug.phrases'
end
tokenized_input = tokenize input
translation_options = get_translation_options tokenized_input, phrase_table
a = []
(0).upto(tokenized_input.size-1) { a << nil }
initial_coverage = Coverage.new a
empty_hypothesis = Hypothesis.new nil, 0.0, initial_coverage, nil, nil
stack = OrderedHypothesisStack.new
stack.push empty_hypothesis
STDERR.write "input size: #{tokenized_input.size}\n"
while not stack.size == 0
hyp = stack.pop
translation_options.each_pair { |span, options|
if hyp.applicable? span
new_coverage = hyp.coverage.clone
new_coverage.set span
options.each { |opt|
stack.push Hypothesis.new opt[0], hyp.score+opt[1]['f2e'], new_coverage, hyp, span
}
else
if hyp.coverage.size == tokenized_input.size and !hyp.final
str = []
scores = []
0.upto(tokenized_input.size-1) { str << '' }
score = hyp.score
cur_hyp = hyp
while true
break if !cur_hyp.rule
a = cur_hyp.rule.split
i = cur_hyp.span[0]
scores << cur_hyp.score
a.each { |tok|
str[i] = tok
i += 1
}
str[i-1] += " |"
cur_hyp = cur_hyp.previous
end
puts "#{score} #{str.join(' ')}|| #{scores.to_s}\n"
hyp.final = true
end
end
}
stack.sort!
if stack.size > cfg[:stack_size]
stack = stack[stack.size-cfg[:stack_size]..stack.size-1]
end
end
end
main
|