1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
require 'timeout'
class Array
def max_index
self.index(self.max)
end
def is_subset_of? other
self.each { |i|
if other.include? i
return false
end
}
return true
end
def sum
self.inject(:+)
end
def mean
self.sum.to_f/self.size
end
end
class String
def downcase?
self[/[[:lower:]]/]
end
end
class PriorityQueue
# This assumes that elements in the queue
# have a numerical member named 'score'.
def initialize a=Array.new
@queue = Array.new a
sort!
end
def sort!
@queue.sort_by! { |i| -i.score }
end
def pop
@queue.pop
end
def push i
@queue << i
sort!
end
def empty?
@queue.empty?
end
end
def spawn_with_timeout cmd, t=4, ignore_fail=false, debug=false
STDERR.write cmd+"\n" if debug
pipe_in, pipe_out = IO.pipe
pid = Process.spawn(cmd, :out => pipe_out)
begin
Timeout.timeout(t) { Process.wait pid }
rescue Timeout::Error
Process.kill('TERM', pid) if !ignore_fail
end
pipe_out.close
return pipe_in.read
end
def read_phrase_table fn
table = {}
f = ReadFile.new fn
while raw_rule = f.gets
french, english, features = splitpipe(raw_rule)
feature_map = SparseVector.from_kv features
if table.has_key? french
table[french] << [english, feature_map ]
else
table[french] = [[english, feature_map]]
end
end
f.close
return table
end
def cdec_kbest cdec_bin, input, ini, weights, k, unique=true
require 'open3'
cmd = "echo \"#{input}\" | #{cdec_bin} -c #{ini} -w #{weights} -k #{k}"
cmd += " -r" if unique
o,_ = Open3.capture2 "#{cmd} 2>/dev/null"
a = []; j = -1
o.split("\n").map{ |i| j+=1; t=Translation.new; t.from_s(i, false, j); a << t }
return a
end
def read_config fn
f = ReadFile.new fn
cfg = {}
while line = f.gets
line.strip!
next if /^\s*$/.match line
next if line[0]=='#'
content = line.split('#', 2).first
k, v = content.split(/\s*=\s*/, 2)
k.strip!; v.strip!
cfg[k] = v
end
return cfg
end
|