summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-01 23:26:43 +0200
committerPatrick Simianer <p@simianer.de>2014-06-01 23:26:43 +0200
commit3b0d4622865e71c49c4efc4db4ac35538a373a47 (patch)
tree722ab034f381a357e84d0bf151928ea3db296a80
parentaf62fcca3729c9763c374520e1665f782bbbdf42 (diff)
externalize grammar.rb
-rw-r--r--README.md5
-rw-r--r--animate.rb17
-rw-r--r--example/glue (renamed from example/grammar.x)0
-rw-r--r--example/grammar-test22
-rw-r--r--grammar.rb111
-rw-r--r--hg.rb0
-rw-r--r--parse.rb47
7 files changed, 46 insertions, 156 deletions
diff --git a/README.md b/README.md
index 460adde..98065f1 100644
--- a/README.md
+++ b/README.md
@@ -4,3 +4,8 @@ nothing to see here
helpful stuff
* https://github.com/jweese/thrax/wiki/Glue-grammar
+todo
+ * animate?
+ * to json
+ * integrate with HG
+
diff --git a/animate.rb b/animate.rb
deleted file mode 100644
index 558146f..0000000
--- a/animate.rb
+++ /dev/null
@@ -1,17 +0,0 @@
-s = 'ich sah ein kleines haus .'
-def visit n, depth, skip=0 # FIXME
- (depth-skip).times { |i|
- i += skip
- 0.upto(n-(i+1)) { |j|
- yield j, j+i+1
- }
- }
-end
-
-0,5
-
-
-1 2
-0,1 1,2 2,3 3,4 4,5
-ich sah ein kleines haus
-
diff --git a/example/grammar.x b/example/glue
index 7ff74b5..7ff74b5 100644
--- a/example/grammar.x
+++ b/example/glue
diff --git a/example/grammar-test b/example/grammar-test
new file mode 100644
index 0000000..f49b999
--- /dev/null
+++ b/example/grammar-test
@@ -0,0 +1,22 @@
+[S] ||| [B,1] ||| [1] ||| logp=0
+[S] ||| ich [V,1] ein [JJ,2] haus ||| i [1] a [2] |||
+[S] ||| ich sah ein kleines haus ||| i saw a small house |||
+[S] ||| ich sah ein [JJ,1] ||| i saw a [1] |||
+[B] ||| [C,1] ||| [1] ||| logp=0
+[C] ||| [Q,1] ||| [1] ||| logp=0
+[Q] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0
+[NP] ||| ich ||| i ||| logp=-0.5 use_i=1.0
+[NP] ||| ein [NN,1] ||| a [1] ||| logp=0 use_a=1.0
+[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1
+[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1
+[JJ] ||| [H,1] ||| [1] |||
+[H] ||| kleines [Z,1] ||| small [1] |||
+[Z] ||| [I,1] ||| [1] |||
+[I] ||| haus ||| house |||
+[JJ] ||| kleines ||| small ||| logp=0 use_small=1.0
+[JJ] ||| kleines ||| little ||| logp=0 use_little=1.0
+[JJ] ||| grosses ||| big ||| logp=0
+[JJ] ||| grosses ||| large ||| logp=0
+[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0
+[V] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0
+[V] ||| fand ||| found ||| logp=0
diff --git a/grammar.rb b/grammar.rb
deleted file mode 100644
index a674a7b..0000000
--- a/grammar.rb
+++ /dev/null
@@ -1,111 +0,0 @@
-require 'nlp_ruby'
-
-
-class T
- attr_accessor :word
-
- def initialize word
- @word = word
- end
-
- def to_s
- "T<#{@word}>"
- end
-end
-
-class NT
- attr_accessor :symbol, :index, :span
-
- def initialize symbol, index=0
- @symbol = symbol
- @index = index
- @span = Span.new
- end
-
- def to_s
- "NT(#{@span.left},#{@span.right})<#{@symbol},#{@index}>"
- end
-end
-
-
-
-class Rule
- attr_accessor :lhs, :rhs
-
- def initialize lhs=nil, rhs=[]
- @lhs = lhs
- @rhs = rhs
- end
-
- def to_s
- "#{lhs} -> #{rhs.map{ |i| i.to_s }.join ' '} [arity=#{arity}]"
- end
-
- def arity
- rhs.select { |i| i.class == NT }.size
- end
-
- def from_s s
- _ = splitpipe s, 3
- @lhs = NT.new _[0].strip.gsub!(/(\[|\])/, "")
- _[1].split.each { |x|
- x.strip!
- if x[0]=='[' && x[x.size-1] == ']'
- @rhs << NT.new(x.gsub!(/(\[|\])/, "").split(',')[0])
- else
- @rhs << T.new(x)
- end
- }
- end
-
- def self.from_s s
- r = self.new
- r.from_s s
- return r
- end
-end
-
-class Grammar
- attr_accessor :rules, :startn, :startt, :flat
-
- def initialize fn
- @rules = []; @startn = []; @startt = [] ;@flat = []
- ReadFile.readlines_strip(fn).each_with_index { |s,i|
- STDERR.write '.'; STDERR.write " #{i+1}\n" if (i+1)%80==0
- @rules << Rule.from_s(s)
- if @rules.last.rhs.first.class == NT
- @startn << @rules.last
- else
- if rules.last.arity == 0
- @flat << @rules.last
- else
- @startt << @rules.last
- end
- end
- }
- STDERR.write "\n"
- end
-
- def to_s
- s = ''
- @rules.each { |r| s += r.to_s+"\n" }
- return s
- end
-
- def add_glue_rules
- @rules.map { |r| r.lhs.symbol }.select { |s| s != 'S' }.uniq.each { |symbol|
- @rules << Rule.new(NT.new('S'), [NT.new(symbol)])
- @startn << @rules.last
- @rules << Rule.new(NT.new('S'), [NT.new('S'), NT.new('X')])
- @startn << @rules.last
- }
- end
-
- def add_pass_through_rules s
- s.each { |word|
- @rules << Rule.new(NT.new('X'), [T.new(word)])
- @flat << @rules.last
- }
- end
-end
-
diff --git a/hg.rb b/hg.rb
deleted file mode 100644
index e69de29..0000000
--- a/hg.rb
+++ /dev/null
diff --git a/parse.rb b/parse.rb
index 3ea4fc0..fa3826b 100644
--- a/parse.rb
+++ b/parse.rb
@@ -1,6 +1,6 @@
#!/usr/bin/env ruby
-require_relative './grammar.rb'
+require 'nlp_ruby'
class Chart
@@ -29,36 +29,28 @@ class Chart
end
end
-class Span
- attr_accessor :left, :right
-
- def initialize left=nil, right=nil
- @left = left
- @right = right
- end
-end
-
-class Item < Rule
- attr_accessor :lhs, :rhs, :dot
+class Item < Grammar::Rule
+ attr_accessor :lhs, :rhs, :dot, :e
def initialize rule_or_item, left, right, dot
- @lhs = NT.new rule_or_item.lhs.symbol
- @lhs.span = Span.new left, right
+ @lhs = Grammar::NT.new rule_or_item.lhs.symbol
+ @lhs.span = Grammar::Span.new left, right
@rhs = []
rule_or_item.rhs.each { |x|
- if x.class == T
- @rhs << T.new(x.word)
+ if x.class == Grammar::T
+ @rhs << Grammar::T.new(x.word)
end
- if x.class == NT
- @rhs << NT.new(x.symbol)
- @rhs.last.span = Span.new x.span.left, x.span.right
+ if x.class == Grammar::NT
+ @rhs << Grammar::NT.new(x.symbol)
+ @rhs.last.span = Grammar::Span.new x.span.left, x.span.right
end
}
@dot = dot
+ @e = rule_or_item.e
end
def to_s
- "#{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} [dot@#{@dot}] [arity=#{arity}] (#{@lhs.span.left}, #{@lhs.span.right})"
+ "#{lhs} -> #{rhs.map{|i|i.to_s}.insert(@dot,'*').join ' '} [dot@#{@dot}] [arity=#{arity}] (#{@lhs.span.left}, #{@lhs.span.right}) ||| #{@e}"
end
end
@@ -73,7 +65,7 @@ def init input, n, active_chart, passive_chart, grammar
end
def scan item, input, limit, passive_chart
- while item.rhs[item.dot].class == T
+ while item.rhs[item.dot].class == Grammar::T
return false if item.lhs.span.right==limit
if item.rhs[item.dot].word == input[item.lhs.span.right]
item.dot += 1
@@ -120,7 +112,7 @@ def parse input, n, active_chart, passive_chart, grammar
if passive_chart.has active_item.rhs[active_item.dot].symbol, k, l
if k == active_item.lhs.span.right
new_item = Item.new active_item, active_item.lhs.span.left, l, active_item.dot+1
- new_item.rhs[new_item.dot-1].span = Span.new k, l
+ new_item.rhs[new_item.dot-1].span = Grammar::Span.new k, l
if scan new_item, input, j, passive_chart
if new_item.dot == new_item.rhs.size
if new_item.lhs.span.left == i && new_item.lhs.span.right == j
@@ -145,13 +137,12 @@ def parse input, n, active_chart, passive_chart, grammar
# 'self-filling' step
new_symbols.each { |s|
- puts new_symbols.to_s if i==2&&j==5
remaining_items.each { |active_item|
next if active_item.dot!=0
- next if active_item.rhs[active_item.dot].class!=NT
+ next if active_item.rhs[active_item.dot].class!=Grammar::NT
if active_item.rhs[active_item.dot].symbol == s
new_item = Item.new active_item, i, j, active_item.dot+1
- new_item.rhs[new_item.dot-1].span = Span.new i, j
+ new_item.rhs[new_item.dot-1].span = Grammar::Span.new i, j
if new_item.dot==new_item.rhs.size
new_symbols << new_item.lhs.symbol if !new_symbols.include? new_item.lhs.symbol
passive_chart.add new_item, i, j
@@ -165,13 +156,13 @@ end
def main
STDERR.write "> reading input from TODO\n"
- input = 'ich sah ein kleines haus'.split
+ #input = 'ich sah ein kleines haus'.split
#input = 'lebensmittel schuld an europäischer inflation'.split
- #input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split
+ input = 'offizielle prognosen sind von nur 3 prozent ausgegangen , meldete bloomberg .'.split
n = input.size
STDERR.write "> reading grammar\n"
- grammar = Grammar.new 'example/grammarx'
+ grammar = Grammar::Grammar.new 'example/grammar.3.gz'
STDERR.write ">> adding glue grammar\n"
#grammar.add_glue_rules
STDERR.write ">> adding pass-through grammar\n"