summaryrefslogtreecommitdiff
path: root/main.rb
blob: 229454a8b721247f196fcc21648f03a0a5d67991 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env ruby

require 'trollop'
require 'xmlsimple'
require_relative 'parse'


def read_grammar fn, add_glue, add_pass_through
  STDERR.write "> reading grammar '#{fn}'\n"
  grammar = Grammar::Grammar.new fn
  if add_glue
    STDERR.write ">> adding glue grammar\n"
    grammar.add_glue_rules
  end
  if add_pass_through
    STDERR.write ">> adding pass-through grammar\n"
    grammar.add_pass_through_rules input
  end
  return grammar
end

def main
  cfg = Trollop::options do
    opt :input,            "", :type => :string, :default => '-',   :short => '-i'
    opt :grammar,          "", :type => :string, :default => nil,   :short => '-g'
    opt :weights,          "", :type => :string, :default => nil,   :short => '-w'
    opt :add_glue,         "", :type => :bool,   :default => false, :short => '-h'
    opt :add_pass_through, "", :type => :bool,   :default => false, :short => '-p'
  end

  grammar = nil
  if cfg[:grammar]
    grammar = read_grammar cfg[:grammar], cfg[:add_glue], cfg[:add_pass_through]
  end

  STDERR.write "> reading input from '#{cfg[:input]}'\n"
  ReadFile.readlines_strip(cfg[:input]).each { |input|

    x = XmlSimple.xml_in(input)
    input = x['content'].split
    n = input.size

    if x['grammar']
      grammar = read_grammar x['grammar'], cfg[:add_glue], cfg[:add_pass_through]
    end

    STDERR.write "> initializing charts\n"
    passive_chart = Parse::Chart.new n
    active_chart = Parse::Chart.new n
    Parse::init input, n, active_chart, passive_chart, grammar

    STDERR.write "> parsing\n"
    Parse::parse input, n, active_chart, passive_chart, grammar

    weights = SparseVector.from_kv(ReadFile.read(cfg[:weights]), ' ', "\n")
    if !weights
      weights = SparseVector.new
    end

    hypergraph, nodes_by_id = passive_chart.to_hg weights

    STDERR.write "> viterbi\n"
    semiring = ViterbiSemiring.new
    path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring
    s = HG::derive path, path.last.head, []
    puts "#{s.map { |i| i.word }.join ' '}"
    puts Math.log score
    puts

  }
end


main