1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
#!/usr/bin/env ruby
require 'trollop'
require 'xmlsimple'
require_relative 'parse'
def read_grammar fn, add_glue, add_pass_through, input=nil
STDERR.write "> reading grammar '#{fn}'\n"
grammar = Grammar::Grammar.new fn
if add_glue
STDERR.write ">> adding glue rules\n"
grammar.add_glue_rules
end
if add_pass_through
STDERR.write ">> adding pass-through rules\n"
grammar.add_pass_through_rules input
end
return grammar
end
def main
cfg = Trollop::options do
opt :input, "", :type => :string, :default => '-', :short => '-i'
opt :grammar, "", :type => :string, :required => true, :short => '-g'
opt :weights, "", :type => :string, :required => true, :short => '-w'
opt :add_glue, "", :type => :bool, :default => false, :short => '-l'
opt :add_pass_through, "", :type => :bool, :default => false, :short => '-p'
end
grammar = nil
if cfg[:grammar]
grammar = read_grammar cfg[:grammar], cfg[:add_glue], cfg[:add_pass_through]
end
sgm_input = false
if ['sgm', 'xml'].include? cfg[:input].split('.')[-1]
sgm_input = true
end
STDERR.write "> reading input from '#{cfg[:input]}'\n"
ReadFile.readlines_strip(cfg[:input]).each { |input|
if sgm_input
x = XmlSimple.xml_in(input)
input = x['content'].split
else
input = input.split
end
n = input.size
if sgm_input && x['grammar']
grammar = read_grammar x['grammar'], cfg[:add_glue], cfg[:add_pass_through], input
elsif cfg[:add_pass_through]
grammar.add_pass_through_rules input
end
STDERR.write "> initializing charts\n"
passive_chart = Parse::Chart.new n
active_chart = Parse::Chart.new n
Parse::init input, n, active_chart, passive_chart, grammar
STDERR.write "> parsing\n"
Parse::parse input, n, active_chart, passive_chart, grammar
weights = SparseVector.from_kv(ReadFile.read(cfg[:weights]), ' ', "\n")
if !weights
weights = SparseVector.new
end
hypergraph = passive_chart.to_hg weights
STDERR.write "> viterbi\n"
semiring = ViterbiSemiring.new
path, score = HG::viterbi_path hypergraph, hypergraph.nodes_by_id[-1], semiring
s = HG::derive path, path.last.head, []
STDOUT.write "#{s.map { |i| i.word }.join ' '} ||| #{Math.log score}\n"
}
end
main
|