summaryrefslogtreecommitdiff
path: root/grammar.rb
blob: 3ee8105f61c738e9f2a433f9fc2bb3ee9f900d8a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
require 'nlp_ruby'


class T
  attr_accessor :word

  def initialize word
    @word = word
  end

  def to_s
    "T<#{@word}>"
  end
end

class NT
  attr_accessor :symbol, :index

  def initialize symbol, index=0
    @symbol = symbol
    @index = index
  end

  def to_s
    "NT<#{@symbol},#{@index}>"
  end
end



class Rule
  attr_accessor :lhs, :rhs

  def initialize lhs=nil, rhs=[]
    @lhs = lhs
    @rhs = rhs
  end

  def to_s
    "#{lhs} -> #{rhs.map{ |i| i.to_s }.join ' '} [arity=#{arity}]"
  end

  def arity
    rhs.reject { |i| i.class==T }.size
  end

  def from_s s
    _ = splitpipe s, 3
    @lhs = NT.new _[0].strip.gsub!(/(\[|\])/, "")
    _[1].split.each { |i|
      i.strip!
      if i[0]=='[' && i[i.size-1] == ']'
        @rhs << NT.new(i.gsub!(/(\[|\])/, "").split(',')[0])
      else
        @rhs << T.new(i)
      end
    }
  end

  def self.from_s s
    r = self.new
    r.from_s s
    r
  end
end

class Grammar
  attr_accessor :rules

  def initialize fn
    @rules = []
    ReadFile.readlines_strip(fn).each_with_index { |s,j|
      #STDERR.write '.'
      #puts "\n" if j%100==0&&j>0
      @rules << Rule.from_s(s)
    }
  end

  def to_s
    s = ''
    @rules.each { |r| s += r.to_s+"\n" }
    s
  end

  def add_glue_rules
    @rules.map { |r| r.lhs.symbol }.reject { |s| s=='S' }.uniq.each { |s|
      @rules << Rule.new(NT.new('S'), [NT.new(s)])
      @rules << Rule.new(NT.new('S'), [NT.new('S'), NT.new('X')])
    }
  end

  def add_pass_through_rules input
    input.each { |terminal|
      @rules << Rule.new(NT.new('X'), [T.new(terminal.word)])
    }
  end
end