summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-01 23:24:00 +0200
committerPatrick Simianer <p@simianer.de>2014-06-01 23:24:00 +0200
commit6e286d65e4b0b530b4dbc965fddccb65a1e1e11a (patch)
tree9bec03ab04602db57ed4f4f9e2d664875df80219
parent94fef2d3aac6d7380e92771c46cc0e4655afaea4 (diff)
argh
-rw-r--r--lib/nlp_ruby/grammar.rb122
-rw-r--r--test/hg/hg-toy-with-target.json31
2 files changed, 153 insertions, 0 deletions
diff --git a/lib/nlp_ruby/grammar.rb b/lib/nlp_ruby/grammar.rb
new file mode 100644
index 0000000..7bd8fe6
--- /dev/null
+++ b/lib/nlp_ruby/grammar.rb
@@ -0,0 +1,122 @@
+module Grammar
+
+class T
+ attr_accessor :word
+
+ def initialize word
+ @word = word
+ end
+
+ def to_s
+ "T<#{@word}>"
+ end
+end
+
+class NT
+ attr_accessor :symbol, :index, :span
+
+ def initialize symbol, index=0
+ @symbol = symbol
+ @index = index
+ @span = Span.new
+ end
+
+ def to_s
+ "NT(#{@span.left},#{@span.right})<#{@symbol},#{@index}>"
+ end
+end
+
+class Rule
+ attr_accessor :lhs, :rhs, :e
+
+ def initialize lhs=nil, rhs=[], e=''
+ @lhs = lhs
+ @rhs = rhs
+ @e = e
+ end
+
+ def to_s
+ "#{lhs} -> #{rhs.map{ |i| i.to_s }.join ' '} [arity=#{arity}] ||| #{@e}"
+ end
+
+ def arity
+ rhs.select { |i| i.class == NT }.size
+ end
+
+ def from_s s
+ _ = splitpipe s, 3
+ @lhs = NT.new _[0].strip.gsub!(/(\[|\])/, "")
+ _[1].split.each { |x|
+ x.strip!
+ if x[0]=='[' && x[x.size-1] == ']'
+ @rhs << NT.new(x.gsub!(/(\[|\])/, "").split(',')[0])
+ else
+ @rhs << T.new(x)
+ end
+ }
+ @e = _[2]
+ end
+
+ def self.from_s s
+ r = self.new
+ r.from_s s
+ return r
+ end
+end
+
+class Span
+ attr_accessor :left, :right
+
+ def initialize left=nil, right=nil
+ @left = left
+ @right = right
+ end
+end
+
+class Grammar
+ attr_accessor :rules, :startn, :startt, :flat
+
+ def initialize fn
+ @rules = []; @startn = []; @startt = [] ;@flat = []
+ ReadFile.readlines_strip(fn).each_with_index { |s,i|
+ STDERR.write '.'; STDERR.write " #{i+1}\n" if (i+1)%80==0
+ @rules << Rule.from_s(s)
+ if @rules.last.rhs.first.class == NT
+ @startn << @rules.last
+ else
+ if rules.last.arity == 0
+ @flat << @rules.last
+ else
+ @startt << @rules.last
+ end
+ end
+ }
+ STDERR.write "\n"
+ end
+
+ def to_s
+ s = ''
+ @rules.each { |r| s += r.to_s+"\n" }
+ return s
+ end
+
+ def add_glue_rules
+ @rules.map { |r| r.lhs.symbol }.select { |s| s != 'S' }.uniq.each { |symbol|
+ @rules << Rule.new(NT.new('S'), [NT.new(symbol)])
+ @startn << @rules.last
+ @rules << Rule.new(NT.new('S'), [NT.new('S'), NT.new('X')])
+ @startn << @rules.last
+ }
+ end
+
+ def add_pass_through_rules s
+ s.each { |word|
+ @rules << Rule.new(NT.new('X'), [T.new(word)])
+ @flat << @rules.last
+ }
+ end
+end
+
+
+end # module
+
diff --git a/test/hg/hg-toy-with-target.json b/test/hg/hg-toy-with-target.json
new file mode 100644
index 0000000..1fb4b3d
--- /dev/null
+++ b/test/hg/hg-toy-with-target.json
@@ -0,0 +1,31 @@
+{
+"weights":{
+"logp":2.0, "use_house":0.0, "use_shell":1.0
+},
+"nodes":
+[
+{ "label":"root", "cat":"root" },
+{ "label":"0", "cat":"NP" },
+{ "label":"1", "cat":"V" },
+{ "label":"2", "cat":"JJ" },
+{ "label":"3", "cat":"NN" },
+{ "label":"4", "cat":"NP" },
+{ "label":"5", "cat":"VP" },
+{ "label":"6", "cat":"S" },
+{ "label":"7", "cat":"Goal" }
+],
+"edges":
+[
+{"head":"0", "trule":"[NP] ||| ich ||| i ||| logp=-0.5 use_i=1.0", "left":0, "right":1, "tails":[ "root" ], "f":{"logp":-0.5, "use_i":1.0}, "weight":0.367879441171 },
+{"head":"1", "trule":"[V] ||| sah ||| saw ||| logp=-0.25 use_saw=1.0", "left":1, "right":2, "tails":[ "root" ], "f":{"logp":-0.25, "use_saw":1.0}, "weight":0.606530659713 },
+{"head":"2", "trule":"[JJ] ||| kleines ||| small ||| logp=0.0 use_small=1.0", "left":3, "right":4, "tails":[ "root" ], "f":{"logp":0.0, "use_small":1.0}, "weight":1.0 },
+{"head":"2", "trule":"[JJ] ||| kleines ||| little ||| logp=0.0 use_little=1.0", "left":3, "right":4, "tails":[ "root" ], "f":{"logp":0.0, "use_little":1.0}, "weight":1.0 },
+{"head":"3", "trule":"[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0.0 use_house=1.0", "left":3, "right":5, "tails":[ "2" ], "f":{"logp":0.0, "use_house":1.0}, "weight":1.0 },
+{"head":"3", "trule":"[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0.0 use_shell=1.0", "left":3, "right":5, "tails":[ "2" ], "f":{"logp":0.0, "use_shell":1.0}, "weight":2.71828182846 },
+{"head":"4", "trule":"[NP] ||| ein [NN,1] ||| a [1] ||| logp=0.0 use_a=1.0", "left":2, "right":5, "tails":[ "3" ], "f":{"logp":0.0, "use_a":1.0}, "weight":1.0 },
+{"head":"5", "trule":"[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0.0", "left":1, "right":5, "tails":[ "1","4" ], "f":{"logp":0.0}, "weight":1.0 },
+{"head":"6", "trule":"[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0.0", "left":0, "right":5, "tails":[ "0","5" ], "f":{"logp":0.0}, "weight":1.0 },
+{"head":"7", "trule":"[Goal] ||| [S,1] ||| [1] ||| ", "left":0, "right":5, "tails":[ "6" ], "f":{}, "weight":1.0 }
+]
+}
+