summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--example/3/cdec.ini2
-rw-r--r--example/3/in.sgm1
-rw-r--r--example/glue/in1
-rw-r--r--example/glue/in.sgm1
-rw-r--r--example/toy/in.sgm1
-rwxr-xr-xmain.rb75
-rw-r--r--parse.rb36
-rwxr-xr-xtest/hg.rb29
-rwxr-xr-xtest/parse.rb (renamed from test_parse.rb)22
-rwxr-xr-xtest_hg.rb24
10 files changed, 156 insertions, 36 deletions
diff --git a/example/3/cdec.ini b/example/3/cdec.ini
index ee65b4e..4491e78 100644
--- a/example/3/cdec.ini
+++ b/example/3/cdec.ini
@@ -1,5 +1,5 @@
formalism=scfg
intersection_strategy=full
-grammar=grammars/grammar.3.gz
+grammar=grammar.3.gz
#add_pass_through_rules=true
diff --git a/example/3/in.sgm b/example/3/in.sgm
new file mode 100644
index 0000000..f6fde68
--- /dev/null
+++ b/example/3/in.sgm
@@ -0,0 +1 @@
+<seg id='0' grammar='example/3/grammar.3.gz'>offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .</seg>
diff --git a/example/glue/in b/example/glue/in
new file mode 100644
index 0000000..2d49127
--- /dev/null
+++ b/example/glue/in
@@ -0,0 +1 @@
+lebensmittel schuld an europäischer inflation
diff --git a/example/glue/in.sgm b/example/glue/in.sgm
new file mode 100644
index 0000000..2f1a89b
--- /dev/null
+++ b/example/glue/in.sgm
@@ -0,0 +1 @@
+<seg id='0' grammar='example/glue/grammar'>lebensmittel schuld an europäischer inflation</seg>
diff --git a/example/toy/in.sgm b/example/toy/in.sgm
new file mode 100644
index 0000000..561d346
--- /dev/null
+++ b/example/toy/in.sgm
@@ -0,0 +1 @@
+<seg id='0' grammar='example/toy/grammar'>ich sah ein kleines haus</seg>
diff --git a/main.rb b/main.rb
new file mode 100755
index 0000000..229454a
--- /dev/null
+++ b/main.rb
@@ -0,0 +1,75 @@
+#!/usr/bin/env ruby
+
+require 'trollop'
+require 'xmlsimple'
+require_relative 'parse'
+
+
+def read_grammar fn, add_glue, add_pass_through
+ STDERR.write "> reading grammar '#{fn}'\n"
+ grammar = Grammar::Grammar.new fn
+ if add_glue
+ STDERR.write ">> adding glue grammar\n"
+ grammar.add_glue_rules
+ end
+ if add_pass_through
+ STDERR.write ">> adding pass-through grammar\n"
+ grammar.add_pass_through_rules input
+ end
+ return grammar
+end
+
+def main
+ cfg = Trollop::options do
+ opt :input, "", :type => :string, :default => '-', :short => '-i'
+ opt :grammar, "", :type => :string, :default => nil, :short => '-g'
+ opt :weights, "", :type => :string, :default => nil, :short => '-w'
+ opt :add_glue, "", :type => :bool, :default => false, :short => '-h'
+ opt :add_pass_through, "", :type => :bool, :default => false, :short => '-p'
+ end
+
+ grammar = nil
+ if cfg[:grammar]
+ grammar = read_grammar cfg[:grammar], cfg[:add_glue], cfg[:add_pass_through]
+ end
+
+ STDERR.write "> reading input from '#{cfg[:input]}'\n"
+ ReadFile.readlines_strip(cfg[:input]).each { |input|
+
+ x = XmlSimple.xml_in(input)
+ input = x['content'].split
+ n = input.size
+
+ if x['grammar']
+ grammar = read_grammar x['grammar'], cfg[:add_glue], cfg[:add_pass_through]
+ end
+
+ STDERR.write "> initializing charts\n"
+ passive_chart = Parse::Chart.new n
+ active_chart = Parse::Chart.new n
+ Parse::init input, n, active_chart, passive_chart, grammar
+
+ STDERR.write "> parsing\n"
+ Parse::parse input, n, active_chart, passive_chart, grammar
+
+ weights = SparseVector.from_kv(ReadFile.read(cfg[:weights]), ' ', "\n")
+ if !weights
+ weights = SparseVector.new
+ end
+
+ hypergraph, nodes_by_id = passive_chart.to_hg weights
+
+ STDERR.write "> viterbi\n"
+ semiring = ViterbiSemiring.new
+ path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring
+ s = HG::derive path, path.last.head, []
+ puts "#{s.map { |i| i.word }.join ' '}"
+ puts Math.log score
+ puts
+
+ }
+end
+
+
+main
+
diff --git a/parse.rb b/parse.rb
index b39b456..f1ad2c2 100644
--- a/parse.rb
+++ b/parse.rb
@@ -1,5 +1,6 @@
require 'zipf'
require_relative 'grammar'
+require_relative 'hg'
module Parse
@@ -82,6 +83,41 @@ class Chart
return json_s
end
+
+ def to_hg weights
+ nodes = []
+ edges = []
+ nodes_by_id = {}
+ nodes << HG::Node.new(-1, "root", [-1,-1])
+ nodes_by_id[-1] = nodes.last
+ id = 0
+ seen = {}
+ Parse::visit(1, 0, @n) { |i,j|
+ self.at(i,j).each { |item|
+ _ = "#{item.lhs.symbol},#{i},#{j}"
+ if !seen[_]
+ nodes << HG::Node.new(id, item.lhs.symbol, [i,j])
+ nodes_by_id[id] = nodes.last
+ seen[_] = id
+ id += 1
+ end
+ }
+ }
+
+ Parse::visit(1, 0, @n) { |i,j|
+ self.at(i,j).each { |item|
+ edges << HG::Hyperedge.new(nodes_by_id[seen[item.lhs.symbol+','+i.to_s+','+j.to_s]], \
+ (item.tail_spans.empty? ? [nodes_by_id[-1]] : item.rhs.zip((0..item.rhs.size-1).map{|q| item.tail_spans[q] }).select{|x| x[0].class==Grammar::NT }.map{|x| nodes_by_id[seen["#{x[0].symbol},#{x[1].left},#{x[1].right}"]]}), \
+ Math.exp(weights.dot(item.f)),
+ item.f,
+ Grammar::Rule.new(item.lhs, item.rhs, item.target, item.map, item.f), \
+ )
+ edges.last.head.incoming << edges.last
+ edges.last.tails.each { |n| n.outgoing << edges.last }
+ }
+ }
+ return HG::Hypergraph.new(nodes, edges), nodes_by_id
+ end
end
Span = Struct.new(:left, :right)
diff --git a/test/hg.rb b/test/hg.rb
new file mode 100755
index 0000000..16e6b6f
--- /dev/null
+++ b/test/hg.rb
@@ -0,0 +1,29 @@
+#!/usr/bin/env ruby
+
+require_relative '../hg'
+
+
+def main
+ # viterbi
+ semiring = ViterbiSemiring.new
+ hypergraph, nodes_by_id = HG::read_hypergraph_from_json('../example/toy/toy.json', semiring, true)
+ #hypergraph, nodes_by_id = HG::read_hypergraph_from_json('../example/glue/glue.json', semiring, true)
+ #hypergraph, nodes_by_id = HG::read_hypergraph_from_json('../example/3/3.json', semiring, true)
+ path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring
+ s = HG::derive path, path.last.head, []
+ puts "#{s.map { |i| i.word }.join ' '}"
+ puts Math.log score
+ puts
+
+ # all paths
+ hypergraph.reset
+ paths = HG::all_paths hypergraph, nodes_by_id[-1]
+ paths.each_with_index { |p,i|
+ s = HG::derive p, p.last.head, []
+ puts "#{i+1}. #{s.map { |x| x.word }.join ' '}"
+ }
+end
+
+
+main
+
diff --git a/test_parse.rb b/test/parse.rb
index 835b08a..e139ea4 100755
--- a/test_parse.rb
+++ b/test/parse.rb
@@ -1,19 +1,19 @@
#!/usr/bin/env ruby
-require_relative 'parse'
+require_relative '../parse'
def main
STDERR.write "> reading input from TODO\n"
- #input = 'ich sah ein kleines haus'.split
+ input = 'ich sah ein kleines haus'.split
#input = 'lebensmittel schuld an europäischer inflation'.split
- input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split
+ #input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split
n = input.size
STDERR.write "> reading grammar\n"
- #grammar = Grammar::Grammar.new 'example/toy/grammar'
- #grammar = Grammar::Grammar.new 'example/glue/grammar'
- grammar = Grammar::Grammar.new 'example/3/grammar.3.gz'
+ grammar = Grammar::Grammar.new '../example/toy/grammar'
+ #grammar = Grammar::Grammar.new '../example/glue/grammar'
+ #grammar = Grammar::Grammar.new '../example/3/grammar.3.gz'
STDERR.write ">> adding glue grammar\n"
#grammar.add_glue_rules
@@ -29,12 +29,12 @@ def main
STDERR.write "> parsing\n"
Parse::parse input, n, active_chart, passive_chart, grammar
- #puts "\n---\npassive chart"
- #Parse::visit(1, 0, 5) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts " #{j} #{item.to_s}" }; puts }
+ puts "\n---\npassive chart"
+ Parse::visit(1, 0, 5) { |i,j| puts "#{i},#{j}"; passive_chart.at(i,j).each { |item| puts " #{j} #{item.to_s}" }; puts }
- weights_file = 'example/toy/weights'
- #weights_file = 'example/glue/weights'
- #weights_file = 'example/3/weights.init'
+ weights_file = '../example/toy/weights'
+ #weights_file = '../example/glue/weights'
+ #weights_file = '../example/3/weights.init'
weights = SparseVector.from_kv(ReadFile.read(weights_file), ' ', "\n")
if !weights
weights = SparseVector.new
diff --git a/test_hg.rb b/test_hg.rb
deleted file mode 100755
index 14fe011..0000000
--- a/test_hg.rb
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env ruby
-
-require_relative 'hg'
-
-
-# viterbi
-semiring = ViterbiSemiring.new
-hypergraph, nodes_by_id = HG::read_hypergraph_from_json('example/toy/toy.json', semiring, true)
-#hypergraph, nodes_by_id = HG::read_hypergraph_from_json('example/glue/glue.json', semiring, true)
-#hypergraph, nodes_by_id = HG::read_hypergraph_from_json('example/3/3.json', semiring, true)
-path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring
-s = HG::derive path, path.last.head, []
-puts "#{s.map { |i| i.word }.join ' '}"
-puts Math.log score
-puts
-
-# all paths
-hypergraph.reset
-paths = HG::all_paths hypergraph, nodes_by_id[-1]
-paths.each_with_index { |p,i|
- s = HG::derive p, p.last.head, []
- puts "#{i+1}. #{s.map { |x| x.word }.join ' '}"
-}
-