From 8949948243cb709e65ec172b3014c9e6a3181ed3 Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Sat, 20 Sep 2014 16:24:39 +0100 Subject: prototype: fixed grammar handling --- prototype/grammar.rb | 1 + prototype/test_hg.rb | 1 - prototype/test_parse.rb | 1 - prototype/weaver.rb | 30 +++++++++++++++++++++--------- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/prototype/grammar.rb b/prototype/grammar.rb index 42ffbc0..05224f4 100644 --- a/prototype/grammar.rb +++ b/prototype/grammar.rb @@ -126,6 +126,7 @@ class Grammar end def add_pass_through a + return if !a a.each { |word| @rules << Rule.new(NT.new('X'), [T.new(word)], [T.new(word)]) @flat << @rules.last diff --git a/prototype/test_hg.rb b/prototype/test_hg.rb index 4be72bd..65b61f1 100755 --- a/prototype/test_hg.rb +++ b/prototype/test_hg.rb @@ -2,7 +2,6 @@ require_relative 'hypergraph' - def main # viterbi semiring = ViterbiSemiring.new diff --git a/prototype/test_parse.rb b/prototype/test_parse.rb index 918101b..0d8e625 100755 --- a/prototype/test_parse.rb +++ b/prototype/test_parse.rb @@ -2,7 +2,6 @@ require_relative 'parse' - def main STDERR.write "> reading input from TODO\n" input = 'ich sah ein kleines haus'.split diff --git a/prototype/weaver.rb b/prototype/weaver.rb index 966d4c8..5afdb93 100755 --- a/prototype/weaver.rb +++ b/prototype/weaver.rb @@ -20,11 +20,11 @@ end def main cfg = Trollop::options do - opt :input, "", :type => :string, :default => '-', :short => '-i' - opt :grammar, "", :type => :string, :default => nil, :short => '-g' - opt :weights, "", :type => :string, :default => nil, :short => '-w' - opt :add_glue, "", :type => :bool, :default => false, :short => '-l' - opt :add_pass_through, "", :type => :bool, :default => false, :short => '-p' + opt :input, "", :type => :string, :default => '-', :short => '-i' + opt :grammar, "", :type => :string, :required => true, :short => '-g' + opt :weights, "", :type => :string, :required => true, :short => '-w' + opt :add_glue, "", :type => :bool, :default => false, :short => '-l' + opt :add_pass_through, "", :type => :bool, :default => false, :short => '-p' end grammar = nil @@ -32,17 +32,29 @@ def main grammar = read_grammar cfg[:grammar], cfg[:add_glue], cfg[:add_pass_through] end + sgm_input = false + if ['sgm', 'xml'].include? cfg[:input].split('.')[-1] + sgm_input = true + end + STDERR.write "> reading input from '#{cfg[:input]}'\n" ReadFile.readlines_strip(cfg[:input]).each { |input| - x = XmlSimple.xml_in(input) - input = x['content'].split + if sgm_input + x = XmlSimple.xml_in(input) + input = x['content'].split + else + input = input.split + end n = input.size - if x['grammar'] + if sgm_input && x['grammar'] grammar = read_grammar x['grammar'], cfg[:add_glue], cfg[:add_pass_through], input + elsif cfg[:add_pass_through] + grammar.add_pass_through input end + STDERR.write "> initializing charts\n" passive_chart = Parse::Chart.new n active_chart = Parse::Chart.new n @@ -62,7 +74,7 @@ def main semiring = ViterbiSemiring.new path, score = HG::viterbi_path hypergraph, hypergraph.nodes_by_id[-1], semiring s = HG::derive path, path.last.head, [] - STDERR.write " #{s.map { |i| i.word }.join ' '} ||| #{Math.log score}\n" + STDOUT.write "#{s.map { |i| i.word }.join ' '} ||| #{Math.log score}\n" } end -- cgit v1.2.3