From 77666a09c0f82b231605da463a946a5a5fcd09b6 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 24 Feb 2026 17:07:57 +0100 Subject: Fix reordering bug in derive and add test example derive used a sequential counter to index into the source-side NT map, which only worked for monotone rules. Now looks up tails by the target NT's own index via map.index(i.index). Adds toy-reorder example (German verb-final -> English SVO) to exercise the fix. Also updates trollop -> optimist and guards xmlsimple require. Co-Authored-By: Claude Opus 4.6 --- prototype/hypergraph.rb | 4 +--- prototype/ow_proto.rb | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'prototype') diff --git a/prototype/hypergraph.rb b/prototype/hypergraph.rb index fd72393..fdaba5a 100644 --- a/prototype/hypergraph.rb +++ b/prototype/hypergraph.rb @@ -196,11 +196,9 @@ def HG::derive path, cur, carry edge = path.select { |e| e.head.symbol==cur.symbol \ && e.head.left==cur.left \ && e.head.right==cur.right }.first - j = 0 edge.rule.target.each { |i| if i.class == Grammar::NT - derive path, edge.tails[edge.rule.map[j]], carry - j += 1 + derive path, edge.tails[edge.rule.map.index(i.index)], carry else carry << i end diff --git a/prototype/ow_proto.rb b/prototype/ow_proto.rb index 912090b..41fe683 100755 --- a/prototype/ow_proto.rb +++ b/prototype/ow_proto.rb @@ -1,7 +1,7 @@ #!/usr/bin/env ruby -require 'trollop' -require 'xmlsimple' +require 'optimist' +begin; require 'xmlsimple'; rescue LoadError; end require_relative 'parse' def read_grammar fn, add_glue, add_pass_through, input=nil @@ -19,7 +19,7 @@ def read_grammar fn, add_glue, add_pass_through, input=nil end def main - cfg = Trollop::options do + cfg = Optimist::options do opt :input, "", :type => :string, :default => '-', :short => '-i' opt :grammar, "", :type => :string, :required => true, :short => '-g' opt :weights, "", :type => :string, :required => true, :short => '-w' -- cgit v1.2.3 From e76951f21263eb7010a2898b9744364e989e90b8 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 24 Feb 2026 17:14:37 +0100 Subject: Deduplicate all_paths by reachable edge set The Cartesian product over all nodes produces duplicate derivations when edges differ only in nodes unreachable from the top. Walk reachable edges from the top edge of each path and drop paths with identical reachable sets. Co-Authored-By: Claude Opus 4.6 --- prototype/hypergraph.rb | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'prototype') diff --git a/prototype/hypergraph.rb b/prototype/hypergraph.rb index fdaba5a..08d1a29 100644 --- a/prototype/hypergraph.rb +++ b/prototype/hypergraph.rb @@ -189,7 +189,21 @@ def HG::all_paths hypergraph, root paths = new_paths } - return paths + seen = Set.new + paths.select { |p| + reachable = Set.new + mark_reachable p, p.last, reachable + key = reachable.map(&:object_id).sort + !seen.include?(key) && seen.add(key) + } +end + +def HG::mark_reachable path, edge, used + used << edge + edge.tails.each { |t| + child = path.find { |e| e.head == t } + mark_reachable path, child, used if child && !used.include?(child) + } end def HG::derive path, cur, carry -- cgit v1.2.3 From 22dc0fbdf002c7824941abc17a715a3e70ff37c1 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 24 Feb 2026 17:16:06 +0100 Subject: Emit binary glue rule only once The [S] -> [S] [X] concatenation rule was duplicated for every non-S LHS symbol. Move it out of the loop so it's added once. Co-Authored-By: Claude Opus 4.6 --- prototype/grammar.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'prototype') diff --git a/prototype/grammar.rb b/prototype/grammar.rb index abccb15..4aebd95 100644 --- a/prototype/grammar.rb +++ b/prototype/grammar.rb @@ -120,9 +120,9 @@ class Grammar @rules.map { |r| r.lhs.symbol }.select { |s| s != 'S' }.uniq.each { |symbol| @rules << Rule.new(NT.new('S'), [NT.new(symbol, 0)], [NT.new(symbol, 0)], [0]) @start_nt << @rules.last - @rules << Rule.new(NT.new('S'), [NT.new('S', 0), NT.new('X', 1)], [NT.new('S', 0), NT.new('X', 1)], [0, 1]) - @start_nt << @rules.last } + @rules << Rule.new(NT.new('S'), [NT.new('S', 0), NT.new('X', 1)], [NT.new('S', 0), NT.new('X', 1)], [0, 1]) + @start_nt << @rules.last end def add_pass_through_rules a -- cgit v1.2.3 From 4e62908a1757f83ff703399252ad50758c4eb237 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 24 Feb 2026 17:18:29 +0100 Subject: Replace silent rescue with explicit type check in Item constructor When creating an Item from a Rule (not an Item), tail_spans doesn't exist. Check with is_a?(Item) instead of catching the exception silently. Co-Authored-By: Claude Opus 4.6 --- prototype/parse.rb | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'prototype') diff --git a/prototype/parse.rb b/prototype/parse.rb index adf2b91..40a69e7 100644 --- a/prototype/parse.rb +++ b/prototype/parse.rb @@ -90,14 +90,10 @@ class Item < Grammar::Rule rule_or_item.rhs.each_with_index { |x,i| # duplicate rhs partially @rhs << x if x.class == Grammar::NT - begin - if i >= dot - @tail_spans[i] = Span.new(-1, -1) - else - @tail_spans[i] = rule_or_item.tail_spans[i].dup - end - rescue + if i >= dot || !rule_or_item.is_a?(Item) @tail_spans[i] = Span.new(-1, -1) + else + @tail_spans[i] = rule_or_item.tail_spans[i].dup end end } -- cgit v1.2.3 From 44f225d0642d2ecf13f533f68b9ae12d849809ea Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Thu, 26 Feb 2026 19:29:18 +0100 Subject: Fix two bugs in prototype parser 1. Inner visit at span (0,1) yielded no sub-spans because visit(1, 0, 1, 1) iterates span from 1 to r-x=0, which is empty. This prevented unary rules like [S] ||| [X,1] from completing at the leftmost span, so S(0,1) was never created. Drop the x=1 parameter (default x=0); scan already handles bounds checking. 2. Self-filling step searched remaining_items for unary NT rules, but those rules could be absent if consumed (advanced) during the parse loop. Look up grammar.start_nt directly instead, which covers all cases. Co-Authored-By: Claude Opus 4.6 --- prototype/parse.rb | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'prototype') diff --git a/prototype/parse.rb b/prototype/parse.rb index 40a69e7..1741030 100644 --- a/prototype/parse.rb +++ b/prototype/parse.rb @@ -152,7 +152,7 @@ def Parse::parse input, n, active_chart, passive_chart, grammar while !active_chart.at(i,j).empty? active_item = active_chart.at(i,j).pop advanced = false - visit(1, i, j, 1) { |k,l| + visit(1, i, j) { |k,l| if passive_chart.has active_item.rhs[active_item.dot].symbol, k, l if k == active_item.right new_item = Item.new active_item, active_item.left, l, active_item.dot+1 @@ -182,16 +182,15 @@ def Parse::parse input, n, active_chart, passive_chart, grammar # 'self-filling' step new_symbols.each { |s| - remaining_items.each { |item| - next if item.dot!=0 - next if item.rhs[item.dot].class!=Grammar::NT - if item.rhs[item.dot].symbol == s - new_item = Item.new item, i, j, item.dot+1 - new_item.tail_spans[new_item.dot-1] = Span.new(i,j) - if new_item.dot==new_item.rhs.size - new_symbols << new_item.lhs.symbol if !new_symbols.include? new_item.lhs.symbol - passive_chart.add new_item, i, j - end + grammar.start_nt.each { |r| + next if r.rhs.size > j-i + next if r.rhs.first.class!=Grammar::NT + next if r.rhs.first.symbol != s + new_item = Item.new r, i, j, 1 + new_item.tail_spans[0] = Span.new(i,j) + if new_item.dot==new_item.rhs.size + new_symbols << new_item.lhs.symbol if !new_symbols.include? new_item.lhs.symbol + passive_chart.add new_item, i, j end } } -- cgit v1.2.3