From 33fda7f79c02b8ef152b88a11f3810c9a25a7381 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Wed, 18 Jun 2014 13:26:05 +0200
Subject: integration
---
example/3/cdec.ini | 2 +-
example/3/in.sgm | 1 +
example/glue/in | 1 +
example/glue/in.sgm | 1 +
example/toy/in.sgm | 1 +
main.rb | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++
parse.rb | 36 +++++++++++++++++++++++++
test/hg.rb | 29 +++++++++++++++++++++
test/parse.rb | 47 +++++++++++++++++++++++++++++++++
test_hg.rb | 24 -----------------
test_parse.rb | 47 ---------------------------------
11 files changed, 192 insertions(+), 72 deletions(-)
create mode 100644 example/3/in.sgm
create mode 100644 example/glue/in
create mode 100644 example/glue/in.sgm
create mode 100644 example/toy/in.sgm
create mode 100755 main.rb
create mode 100755 test/hg.rb
create mode 100755 test/parse.rb
delete mode 100755 test_hg.rb
delete mode 100755 test_parse.rb
diff --git a/example/3/cdec.ini b/example/3/cdec.ini
index ee65b4e..4491e78 100644
--- a/example/3/cdec.ini
+++ b/example/3/cdec.ini
@@ -1,5 +1,5 @@
formalism=scfg
intersection_strategy=full
-grammar=grammars/grammar.3.gz
+grammar=grammar.3.gz
#add_pass_through_rules=true
diff --git a/example/3/in.sgm b/example/3/in.sgm
new file mode 100644
index 0000000..f6fde68
--- /dev/null
+++ b/example/3/in.sgm
@@ -0,0 +1 @@
+offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .
diff --git a/example/glue/in b/example/glue/in
new file mode 100644
index 0000000..2d49127
--- /dev/null
+++ b/example/glue/in
@@ -0,0 +1 @@
+lebensmittel schuld an europäischer inflation
diff --git a/example/glue/in.sgm b/example/glue/in.sgm
new file mode 100644
index 0000000..2f1a89b
--- /dev/null
+++ b/example/glue/in.sgm
@@ -0,0 +1 @@
+lebensmittel schuld an europäischer inflation
diff --git a/example/toy/in.sgm b/example/toy/in.sgm
new file mode 100644
index 0000000..561d346
--- /dev/null
+++ b/example/toy/in.sgm
@@ -0,0 +1 @@
+ich sah ein kleines haus
diff --git a/main.rb b/main.rb
new file mode 100755
index 0000000..229454a
--- /dev/null
+++ b/main.rb
@@ -0,0 +1,75 @@
+#!/usr/bin/env ruby
+
+require 'trollop'
+require 'xmlsimple'
+require_relative 'parse'
+
+
+def read_grammar fn, add_glue, add_pass_through
+ STDERR.write "> reading grammar '#{fn}'\n"
+ grammar = Grammar::Grammar.new fn
+ if add_glue
+ STDERR.write ">> adding glue grammar\n"
+ grammar.add_glue_rules
+ end
+ if add_pass_through
+ STDERR.write ">> adding pass-through grammar\n"
+ grammar.add_pass_through_rules input
+ end
+ return grammar
+end
+
+def main
+ cfg = Trollop::options do
+ opt :input, "", :type => :string, :default => '-', :short => '-i'
+ opt :grammar, "", :type => :string, :default => nil, :short => '-g'
+ opt :weights, "", :type => :string, :default => nil, :short => '-w'
+ opt :add_glue, "", :type => :bool, :default => false, :short => '-h'
+ opt :add_pass_through, "", :type => :bool, :default => false, :short => '-p'
+ end
+
+ grammar = nil
+ if cfg[:grammar]
+ grammar = read_grammar cfg[:grammar], cfg[:add_glue], cfg[:add_pass_through]
+ end
+
+ STDERR.write "> reading input from '#{cfg[:input]}'\n"
+ ReadFile.readlines_strip(cfg[:input]).each { |input|
+
+ x = XmlSimple.xml_in(input)
+ input = x['content'].split
+ n = input.size
+
+ if x['grammar']
+ grammar = read_grammar x['grammar'], cfg[:add_glue], cfg[:add_pass_through]
+ end
+
+ STDERR.write "> initializing charts\n"
+ passive_chart = Parse::Chart.new n
+ active_chart = Parse::Chart.new n
+ Parse::init input, n, active_chart, passive_chart, grammar
+
+ STDERR.write "> parsing\n"
+ Parse::parse input, n, active_chart, passive_chart, grammar
+
+ weights = SparseVector.from_kv(ReadFile.read(cfg[:weights]), ' ', "\n")
+ if !weights
+ weights = SparseVector.new
+ end
+
+ hypergraph, nodes_by_id = passive_chart.to_hg weights
+
+ STDERR.write "> viterbi\n"
+ semiring = ViterbiSemiring.new
+ path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring
+ s = HG::derive path, path.last.head, []
+ puts "#{s.map { |i| i.word }.join ' '}"
+ puts Math.log score
+ puts
+
+ }
+end
+
+
+main
+
diff --git a/parse.rb b/parse.rb
index b39b456..f1ad2c2 100644
--- a/parse.rb
+++ b/parse.rb
@@ -1,5 +1,6 @@
require 'zipf'
require_relative 'grammar'
+require_relative 'hg'
module Parse
@@ -82,6 +83,41 @@ class Chart
return json_s
end
+
+ def to_hg weights
+ nodes = []
+ edges = []
+ nodes_by_id = {}
+ nodes << HG::Node.new(-1, "root", [-1,-1])
+ nodes_by_id[-1] = nodes.last
+ id = 0
+ seen = {}
+ Parse::visit(1, 0, @n) { |i,j|
+ self.at(i,j).each { |item|
+ _ = "#{item.lhs.symbol},#{i},#{j}"
+ if !seen[_]
+ nodes << HG::Node.new(id, item.lhs.symbol, [i,j])
+ nodes_by_id[id] = nodes.last
+ seen[_] = id
+ id += 1
+ end
+ }
+ }
+
+ Parse::visit(1, 0, @n) { |i,j|
+ self.at(i,j).each { |item|
+ edges << HG::Hyperedge.new(nodes_by_id[seen[item.lhs.symbol+','+i.to_s+','+j.to_s]], \
+ (item.tail_spans.empty? ? [nodes_by_id[-1]] : item.rhs.zip((0..item.rhs.size-1).map{|q| item.tail_spans[q] }).select{|x| x[0].class==Grammar::NT }.map{|x| nodes_by_id[seen["#{x[0].symbol},#{x[1].left},#{x[1].right}"]]}), \
+ Math.exp(weights.dot(item.f)),
+ item.f,
+ Grammar::Rule.new(item.lhs, item.rhs, item.target, item.map, item.f), \
+ )
+ edges.last.head.incoming << edges.last
+ edges.last.tails.each { |n| n.outgoing << edges.last }
+ }
+ }
+ return HG::Hypergraph.new(nodes, edges), nodes_by_id
+ end
end
Span = Struct.new(:left, :right)
diff --git a/test/hg.rb b/test/hg.rb
new file mode 100755
index 0000000..16e6b6f
--- /dev/null
+++ b/test/hg.rb
@@ -0,0 +1,29 @@
+#!/usr/bin/env ruby
+
+require_relative '../hg'
+
+
+def main
+ # viterbi
+ semiring = ViterbiSemiring.new
+ hypergraph, nodes_by_id = HG::read_hypergraph_from_json('../example/toy/toy.json', semiring, true)
+ #hypergraph, nodes_by_id = HG::read_hypergraph_from_json('../example/glue/glue.json', semiring, true)
+ #hypergraph, nodes_by_id = HG::read_hypergraph_from_json('../example/3/3.json', semiring, true)
+ path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring
+ s = HG::derive path, path.last.head, []
+ puts "#{s.map { |i| i.word }.join ' '}"
+ puts Math.log score
+ puts
+
+ # all paths
+ hypergraph.reset
+ paths = HG::all_paths hypergraph, nodes_by_id[-1]
+ paths.each_with_index { |p,i|
+ s = HG::derive p, p.last.head, []
+ puts "#{i+1}. #{s.map { |x| x.word }.join ' '}"
+ }
+end
+
+
+main
+
diff --git a/test/parse.rb b/test/parse.rb
new file mode 100755
index 0000000..e139ea4
--- /dev/null
+++ b/test/parse.rb
@@ -0,0 +1,47 @@
+#!/usr/bin/env ruby
+
+require_relative '../parse'
+
+
+def main
+ STDERR.write "> reading input from TODO\n"
+ input = 'ich sah ein kleines haus'.split
+ #input = 'lebensmittel schuld an europäischer inflation'.split
+ #input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split
+ n = input.size
+
+ STDERR.write "> reading grammar\n"
+ grammar = Grammar::Grammar.new '../example/toy/grammar'
+ #grammar = Grammar::Grammar.new '../example/glue/grammar'
+ #grammar = Grammar::Grammar.new '../example/3/grammar.3.gz'
+
+ STDERR.write ">> adding glue grammar\n"
+ #grammar.add_glue_rules
+
+ STDERR.write ">> adding pass-through grammar\n"
+ #grammar.add_pass_through_rules input
+
+ STDERR.write "> initializing charts\n"
+ passive_chart = Parse::Chart.new n
+ active_chart = Parse::Chart.new n
+ Parse::init input, n, active_chart, passive_chart, grammar
+
+ STDERR.write "> parsing\n"
+ Parse::parse input, n, active_chart, passive_chart, grammar
+
+ puts "\n---\npassive chart"
+ Parse::visit(1, 0, 5) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts " #{j} #{item.to_s}" }; puts }
+
+ weights_file = '../example/toy/weights'
+ #weights_file = '../example/glue/weights'
+ #weights_file = '../example/3/weights.init'
+ weights = SparseVector.from_kv(ReadFile.read(weights_file), ' ', "\n")
+ if !weights
+ weights = SparseVector.new
+ end
+ puts passive_chart.to_json weights
+end
+
+
+main
+
diff --git a/test_hg.rb b/test_hg.rb
deleted file mode 100755
index 14fe011..0000000
--- a/test_hg.rb
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env ruby
-
-require_relative 'hg'
-
-
-# viterbi
-semiring = ViterbiSemiring.new
-hypergraph, nodes_by_id = HG::read_hypergraph_from_json('example/toy/toy.json', semiring, true)
-#hypergraph, nodes_by_id = HG::read_hypergraph_from_json('example/glue/glue.json', semiring, true)
-#hypergraph, nodes_by_id = HG::read_hypergraph_from_json('example/3/3.json', semiring, true)
-path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring
-s = HG::derive path, path.last.head, []
-puts "#{s.map { |i| i.word }.join ' '}"
-puts Math.log score
-puts
-
-# all paths
-hypergraph.reset
-paths = HG::all_paths hypergraph, nodes_by_id[-1]
-paths.each_with_index { |p,i|
- s = HG::derive p, p.last.head, []
- puts "#{i+1}. #{s.map { |x| x.word }.join ' '}"
-}
-
diff --git a/test_parse.rb b/test_parse.rb
deleted file mode 100755
index 835b08a..0000000
--- a/test_parse.rb
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/usr/bin/env ruby
-
-require_relative 'parse'
-
-
-def main
- STDERR.write "> reading input from TODO\n"
- #input = 'ich sah ein kleines haus'.split
- #input = 'lebensmittel schuld an europäischer inflation'.split
- input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split
- n = input.size
-
- STDERR.write "> reading grammar\n"
- #grammar = Grammar::Grammar.new 'example/toy/grammar'
- #grammar = Grammar::Grammar.new 'example/glue/grammar'
- grammar = Grammar::Grammar.new 'example/3/grammar.3.gz'
-
- STDERR.write ">> adding glue grammar\n"
- #grammar.add_glue_rules
-
- STDERR.write ">> adding pass-through grammar\n"
- #grammar.add_pass_through_rules input
-
- STDERR.write "> initializing charts\n"
- passive_chart = Parse::Chart.new n
- active_chart = Parse::Chart.new n
- Parse::init input, n, active_chart, passive_chart, grammar
-
- STDERR.write "> parsing\n"
- Parse::parse input, n, active_chart, passive_chart, grammar
-
- #puts "\n---\npassive chart"
- #Parse::visit(1, 0, 5) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts " #{j} #{item.to_s}" }; puts }
-
- weights_file = 'example/toy/weights'
- #weights_file = 'example/glue/weights'
- #weights_file = 'example/3/weights.init'
- weights = SparseVector.from_kv(ReadFile.read(weights_file), ' ', "\n")
- if !weights
- weights = SparseVector.new
- end
- puts passive_chart.to_json weights
-end
-
-
-main
-
--
cgit v1.2.3