summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-12 16:25:33 +0200
committerPatrick Simianer <p@simianer.de>2014-06-12 16:25:33 +0200
commit1651cfb83e0d95f9ada0b9acae6ede84e605720b (patch)
tree68def18ad3dd78893e5d5495e479010de7101dde
parente834fd628f61aca04f98691eb56e9808c33c6787 (diff)
save some memory
-rw-r--r--README.md7
-rw-r--r--grammar.rb32
-rw-r--r--hg.rb4
-rw-r--r--parse.rb69
-rwxr-xr-xtest_hg.rb7
-rwxr-xr-xtest_parse.rb2
6 files changed, 64 insertions, 57 deletions
diff --git a/README.md b/README.md
index edf89bd..fa50235 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,9 @@ helpful stuff
todo
====
- * animate parsing?
- * integrate with HG
+ * integrate with HG (chart to json)
+ * kbest
+ * feature interface
+ * (global) word ids instead of strings
+ * animate parsing
diff --git a/grammar.rb b/grammar.rb
index a8ec07c..6c95de2 100644
--- a/grammar.rb
+++ b/grammar.rb
@@ -41,11 +41,11 @@ end
class Rule
attr_accessor :lhs, :rhs, :target, :map, :f
- def initialize lhs=nil, rhs=nil, target=nil, map=nil, f=SparseVector.new
+ def initialize lhs=NT.new, rhs=[], target=[], map=[], f=SparseVector.new
@lhs = lhs
@rhs = rhs
@target = target
- @map = (map ? map : [])
+ @map = map
@f = f
@arity_ = nil
end
@@ -59,30 +59,30 @@ class Rule
return @arity_
end
- def read_right_ s, fill_map=false
- _ = []
+ def read_right_ s, create_map=false
+ a = []
s.split.each { |x|
x.strip!
- if x[0]=='[' && x[x.size-1] == ']'
- _ << NT.from_s(x)
- @map << _.last.index if fill_map
+ if x[0] == '[' && x[x.size-1] == ']'
+ a << NT.from_s(x)
+ @map << a.last.index if create_map
else
- _ << T.new(x)
+ a << T.new(x)
end
}
- return _
+ return a
end
def from_s s
lhs, rhs, target, f = splitpipe s, 3
- @lhs = NT.from_s lhs
- @rhs = read_right_ rhs
+ @lhs = NT.from_s lhs
+ @rhs = read_right_ rhs
@target = read_right_ target, true
- @f = (f ? SparseVector.from_kv(f) : nil)
+ @f = (f ? SparseVector.from_kv(f) : nil)
end
- def self.from_s_x s
- r = self.new
+ def self.from_s s
+ r = Rule.new
r.from_s s
return r
end
@@ -110,9 +110,7 @@ class Grammar
end
def to_s
- s = ''
- @rules.each { |r| s += r.to_s+"\n" }
- return s
+ @rules.map { |r| r.to_s }.join "\n"
end
def add_glue_rules
diff --git a/hg.rb b/hg.rb
index b73d860..abd8777 100644
--- a/hg.rb
+++ b/hg.rb
@@ -127,6 +127,10 @@ def HG::viterbi_path hypergraph, root, semiring=ViterbiSemiring.new
return best_path, toposorted.last.score
end
+def HG::k_best hypergraph, root, semiring=nil
+ #TODO
+end
+
def HG::all_paths hypergraph, root
toposorted = topological_sort hypergraph.nodes
paths = [[]]
diff --git a/parse.rb b/parse.rb
index 454bb14..c9cbd7c 100644
--- a/parse.rb
+++ b/parse.rb
@@ -1,17 +1,23 @@
-#!/usr/bin/env ruby
-
require 'nlp_ruby'
require_relative 'grammar'
+def visit i, l, r, x=0
+ i.upto(r-x) { |span|
+ l.upto(r-span) { |k|
+ yield k, k+span
+ }
+ }
+end
+
class Chart
def initialize n
@m = []
(n+1).times {
- _ = []
- (n+1).times { _ << [] }
- @m << _
+ a = []
+ (n+1).times { a << [] }
+ @m << a
}
@b = {}
end
@@ -28,6 +34,10 @@ class Chart
def has symbol, i, j
return @b["#{i},#{j},#{symbol}"]
end
+
+ def to_json
+ #TODO
+ end
end
Span = Struct.new(:left, :right)
@@ -36,27 +46,28 @@ class Item < Grammar::Rule
attr_accessor :left, :right, :tail_spans, :dot, :f
def initialize rule_or_item, left, right, dot
- @lhs = Grammar::NT.new(rule_or_item.lhs.symbol, rule_or_item.lhs.index)
- @left = left
- @right = right
- @rhs = []
- @tail_spans = {}
- @f = rule_or_item.f
- @map = (rule_or_item.map ? rule_or_item.map.dup : [])
- rule_or_item.rhs.each_with_index { |x,i|
- if x.class == Grammar::T
- @rhs << Grammar::T.new(x.word)
- end
+ @lhs = Grammar::NT.new rule_or_item.lhs.symbol, rule_or_item.lhs.index
+ @left = left
+ @right = right
+ @rhs = []
+ @tail_spans = {} # refers to source side, use @map
+ @f = rule_or_item.f
+ @map = (rule_or_item.map ? rule_or_item.map.dup : [])
+ rule_or_item.rhs.each_with_index { |x,i| # duplicate rhs partially
+ @rhs << x
if x.class == Grammar::NT
- @rhs << Grammar::NT.new(x.symbol, x.index)
begin
- @tail_spans[i] = rule_or_item.tail_spans[i].dup
+ if i >= dot
+ @tail_spans[i] = Span.new(-1, -1)
+ else
+ @tail_spans[i] = rule_or_item.tail_spans[i].dup
+ end
rescue
@tail_spans[i] = Span.new(-1, -1)
end
end
}
- @dot = dot
+ @dot = dot
@target = rule_or_item.target
end
@@ -88,14 +99,6 @@ def scan item, input, limit, passive_chart
return true
end
-def visit i, l, r, x=0
- i.upto(r-x) { |span|
- l.upto(r-span) { |k|
- yield k, k+span
- }
- }
-end
-
def parse input, n, active_chart, passive_chart, grammar
visit(1, 0, n) { |i,j|
@@ -112,7 +115,7 @@ def parse input, n, active_chart, passive_chart, grammar
next if r.rhs.size > j-i
active_chart.at(i,j) << Item.new(r, i, i, 0)
}
-
+
# parse
new_symbols = []
remaining_items = []
@@ -148,11 +151,11 @@ def parse input, n, active_chart, passive_chart, grammar
# 'self-filling' step
new_symbols.each { |s|
- remaining_items.each { |active_item|
- next if active_item.dot!=0
- next if active_item.rhs[active_item.dot].class!=Grammar::NT
- if active_item.rhs[active_item.dot].symbol == s
- new_item = Item.new active_item, i, j, active_item.dot+1
+ remaining_items.each { |item|
+ next if item.dot!=0
+ next if item.rhs[item.dot].class!=Grammar::NT
+ if item.rhs[item.dot].symbol == s
+ new_item = Item.new item, i, j, item.dot+1
new_item.tail_spans[new_item.dot-1] = Span.new(i,j)
if new_item.dot==new_item.rhs.size
new_symbols << new_item.lhs.symbol if !new_symbols.include? new_item.lhs.symbol
diff --git a/test_hg.rb b/test_hg.rb
index b72bd85..f4e2ef8 100755
--- a/test_hg.rb
+++ b/test_hg.rb
@@ -4,17 +4,16 @@ require_relative 'hg'
semiring = ViterbiSemiring.new
-hypergraph, nodes_by_id = HG::read_hypergraph_from_json('x.json', semiring, true)
+hypergraph, nodes_by_id = HG::read_hypergraph_from_json('example/3/3.json', semiring, true)
path, score = HG::viterbi_path hypergraph, nodes_by_id[-1], semiring
s = HG::derive path, path.last.head, []
puts "#{s.map { |i| i.word }.join ' '}"
puts Math.log score
puts
-
#hypergraph.reset
#paths = HG::all_paths hypergraph, nodes_by_id[-1]
-#paths.each { |p|
+#paths.each_with_index { |p,i|
# s = HG::derive p, p.last.head, []
-# puts "#{s.map { |i| i.word }.join ' '}"
+# puts "#{i+1}. #{s.map { |i| i.word }.join ' '}"
#}
diff --git a/test_parse.rb b/test_parse.rb
index 18afd55..0187675 100755
--- a/test_parse.rb
+++ b/test_parse.rb
@@ -50,7 +50,7 @@ def main
n = input.size
STDERR.write "> reading grammar\n"
- grammar = Grammar::Grammar.new 'example/grammars/grammar.3.gz'
+ grammar = Grammar::Grammar.new 'example/3/grammar.3.gz'
STDERR.write ">> adding glue grammar\n"
grammar.add_glue_rules
STDERR.write ">> adding pass-through grammar\n"