From e84265273b121eb5fbf465b80ea39d43602703fe Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Mon, 23 Mar 2015 11:01:45 +0100 Subject: fix prototype --- prototype/README.md | 3 ++ prototype/test_hg.rb | 4 --- prototype/test_parse.rb | 33 ++++++++++--------- prototype/weaver.rb | 82 ---------------------------------------------- prototype/weaver_proto.rb | 83 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 103 insertions(+), 102 deletions(-) create mode 100644 prototype/README.md delete mode 100755 prototype/weaver.rb create mode 100755 prototype/weaver_proto.rb (limited to 'prototype') diff --git a/prototype/README.md b/prototype/README.md new file mode 100644 index 0000000..7216d49 --- /dev/null +++ b/prototype/README.md @@ -0,0 +1,3 @@ +experimental scfg machine translation decoder in ruby, currently implements cky+ +parsing and viterbi on hypergraphs + diff --git a/prototype/test_hg.rb b/prototype/test_hg.rb index 65b61f1..d8071fb 100755 --- a/prototype/test_hg.rb +++ b/prototype/test_hg.rb @@ -6,9 +6,6 @@ def main # viterbi semiring = ViterbiSemiring.new hypergraph, nodes_by_id = HG::read_hypergraph_from_json('../example/toy/toy.json', semiring, true) - #hypergraph, nodes_by_id = HG::read_hypergraph_from_json('../example/toy/toy-test.json', semiring, true) - #hypergraph, nodes_by_id = HG::read_hypergraph_from_json('../example/glue/glue.json', semiring, true) - #hypergraph, nodes_by_id = HG::read_hypergraph_from_json('../example/3/3.json', semiring, true) path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring s = HG::derive path, path.last.head, [] path.each { |e| puts "#{e.rule}" } @@ -26,6 +23,5 @@ def main } end - main diff --git a/prototype/test_parse.rb b/prototype/test_parse.rb index cae6168..f57984a 100755 --- a/prototype/test_parse.rb +++ b/prototype/test_parse.rb @@ -1,24 +1,24 @@ #!/usr/bin/env ruby +require 'zipf' require_relative 'parse' def main - STDERR.write "> reading input from TODO\n" - input = 'ich sah ein kleines haus'.split - #input = 'lebensmittel schuld an europäischer inflation'.split - #input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split + fn = '../example/toy/in' + #fn = '../example/glue/in' + STDERR.write "> reading input from #{fn}\n" + input = ReadFile.new(fn).readlines_strip.first.split n = input.size STDERR.write "> reading grammar\n" - #grammar = Grammar::Grammar.new '../example/toy/grammar' - grammar = Grammar::Grammar.new '../example/toy/grammar-test' + grammar = Grammar::Grammar.new '../example/toy/grammar' + #grammar = Grammar::Grammar.new '../example/toy/grammar-test' #grammar = Grammar::Grammar.new '../example/glue/grammar' - #grammar = Grammar::Grammar.new '../example/3/grammar' STDERR.write ">> adding glue grammar\n" grammar.add_glue_rules - STDERR.write ">> adding pass-through grammar\n" + #STDERR.write ">> adding pass-through grammar\n" #grammar.add_pass_through_rules input STDERR.write "> initializing charts\n" @@ -29,15 +29,16 @@ def main STDERR.write "> parsing\n" Parse::parse input, n, active_chart, passive_chart, grammar - puts "\n---\npassive chart" - Parse::visit(1, 0, n) { |i,j| k=0; puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts " #{k} #{item.to_s}"; k+=1 }; puts } + STDERR.write "\n---\npassive chart\n" + Parse::visit(1, 0, n) { |i,j| k=0; STDERR.write "#{i},#{j}\n"; passive_chart.at(i,j).each { |item| STDERR.write " #{k} #{item.to_s}\n"; k+=1 }; STDERR.write "\n" } - weights_file = '../example/toy/weights' - #weights_file = '../example/glue/weights' - #weights_file = '../example/3/weights.init' - weights = SparseVector.from_kv(ReadFile.read(weights_file), ' ', "\n") - if !weights - weights = SparseVector.new + weights_fn = '../example/toy/weights.toy' + #weights_fn = nil + weights = nil + if weights_fn + weights = SparseVector.from_kv(ReadFile.read(weights_fn), ' ', "\n") + else + weights = SparseVector.new end puts passive_chart.to_hg.to_json weights diff --git a/prototype/weaver.rb b/prototype/weaver.rb deleted file mode 100755 index 5cda844..0000000 --- a/prototype/weaver.rb +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env ruby - -require 'trollop' -require 'xmlsimple' -require_relative 'parse' - -def read_grammar fn, add_glue, add_pass_through, input=nil - STDERR.write "> reading grammar '#{fn}'\n" - grammar = Grammar::Grammar.new fn - if add_glue - STDERR.write ">> adding glue rules\n" - grammar.add_glue_rules - end - if add_pass_through - STDERR.write ">> adding pass-through rules\n" - grammar.add_pass_through_rules input - end - return grammar -end - -def main - cfg = Trollop::options do - opt :input, "", :type => :string, :default => '-', :short => '-i' - opt :grammar, "", :type => :string, :required => true, :short => '-g' - opt :weights, "", :type => :string, :required => true, :short => '-w' - opt :add_glue, "", :type => :bool, :default => false, :short => '-l' - opt :add_pass_through, "", :type => :bool, :default => false, :short => '-p' - end - - grammar = nil - if cfg[:grammar] - grammar = read_grammar cfg[:grammar], cfg[:add_glue], cfg[:add_pass_through] - end - - sgm_input = false - if ['sgm', 'xml'].include? cfg[:input].split('.')[-1] - sgm_input = true - end - - STDERR.write "> reading input from '#{cfg[:input]}'\n" - ReadFile.readlines_strip(cfg[:input]).each { |input| - - if sgm_input - x = XmlSimple.xml_in(input) - input = x['content'].split - else - input = input.split - end - n = input.size - - if sgm_input && x['grammar'] - grammar = read_grammar x['grammar'], cfg[:add_glue], cfg[:add_pass_through], input - elsif cfg[:add_pass_through] - grammar.add_pass_through_rules input - end - - - STDERR.write "> initializing charts\n" - passive_chart = Parse::Chart.new n - active_chart = Parse::Chart.new n - Parse::init input, n, active_chart, passive_chart, grammar - - STDERR.write "> parsing\n" - Parse::parse input, n, active_chart, passive_chart, grammar - - weights = SparseVector.from_kv(ReadFile.read(cfg[:weights]), ' ', "\n") - if !weights - weights = SparseVector.new - end - - hypergraph = passive_chart.to_hg weights - - STDERR.write "> viterbi\n" - semiring = ViterbiSemiring.new - path, score = HG::viterbi_path hypergraph, hypergraph.nodes_by_id[-1], semiring - s = HG::derive path, path.last.head, [] - STDOUT.write "#{s.map { |i| i.word }.join ' '} ||| #{Math.log score}\n" - } -end - -main - diff --git a/prototype/weaver_proto.rb b/prototype/weaver_proto.rb new file mode 100755 index 0000000..912090b --- /dev/null +++ b/prototype/weaver_proto.rb @@ -0,0 +1,83 @@ +#!/usr/bin/env ruby + +require 'trollop' +require 'xmlsimple' +require_relative 'parse' + +def read_grammar fn, add_glue, add_pass_through, input=nil + STDERR.write "> reading grammar '#{fn}'\n" + grammar = Grammar::Grammar.new fn + if add_glue + STDERR.write ">> adding glue rules\n" + grammar.add_glue_rules + end + if add_pass_through + STDERR.write ">> adding pass-through rules\n" + grammar.add_pass_through_rules input + end + return grammar +end + +def main + cfg = Trollop::options do + opt :input, "", :type => :string, :default => '-', :short => '-i' + opt :grammar, "", :type => :string, :required => true, :short => '-g' + opt :weights, "", :type => :string, :required => true, :short => '-w' + opt :add_glue, "", :type => :bool, :default => false, :short => '-l' + opt :add_pass_through, "", :type => :bool, :default => false, :short => '-p' + end + + grammar = nil + if cfg[:grammar] + grammar = read_grammar cfg[:grammar], cfg[:add_glue], cfg[:add_pass_through] + end + + sgm_input = false + if ['sgm', 'xml'].include? cfg[:input].split('.')[-1] + sgm_input = true + end + + STDERR.write "> reading input from '#{cfg[:input]}'\n" + ReadFile.readlines_strip(cfg[:input]).each { |input| + + if sgm_input + x = XmlSimple.xml_in(input) + input = x['content'].split + else + input = input.split + end + n = input.size + + if sgm_input && x['grammar'] + grammar = read_grammar x['grammar'], cfg[:add_glue], cfg[:add_pass_through], input + elsif cfg[:add_pass_through] + grammar.add_pass_through_rules input + end + + + STDERR.write "> initializing charts\n" + passive_chart = Parse::Chart.new n + active_chart = Parse::Chart.new n + Parse::init input, n, active_chart, passive_chart, grammar + + STDERR.write "> parsing\n" + Parse::parse input, n, active_chart, passive_chart, grammar + + weights = SparseVector.from_kv(ReadFile.read(cfg[:weights]), ' ', "\n") + if !weights + weights = SparseVector.new + end + + hypergraph = passive_chart.to_hg weights + + STDERR.write "> viterbi\n" + semiring = ViterbiSemiring.new + path, score = HG::viterbi_path hypergraph, hypergraph.nodes_by_id[-1], semiring + s = HG::derive path, path.last.head, [] + STDOUT.write "#{s.map { |i| i.word }.join ' '} ||| #{Math.log score}\n" + + } +end + +main + -- cgit v1.2.3