summaryrefslogtreecommitdiff
path: root/overlapping_rules/merge_rules_local.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-14 16:46:27 +0200
committerPatrick Simianer <p@simianer.de>2014-06-14 16:46:27 +0200
commit26c490f404731d053a6205719b6246502c07b449 (patch)
tree3aa721098f1251dfbf2249ecd2736434c13b1d48 /overlapping_rules/merge_rules_local.rb
init
Diffstat (limited to 'overlapping_rules/merge_rules_local.rb')
-rwxr-xr-xoverlapping_rules/merge_rules_local.rb230
1 files changed, 230 insertions, 0 deletions
diff --git a/overlapping_rules/merge_rules_local.rb b/overlapping_rules/merge_rules_local.rb
new file mode 100755
index 0000000..34bdfe5
--- /dev/null
+++ b/overlapping_rules/merge_rules_local.rb
@@ -0,0 +1,230 @@
+#!/usr/bin/env ruby
+
+# ISSUES
+# new rule already in global g?
+# R+S = R
+# variants?
+#
+
+
+class Rule
+ attr_accessor :nt, :f, :e, :features, :alignment
+
+ def initialize(s=nil)
+ return if !s
+ a = s.strip.split ' ||| '
+ @nt = a[0].strip
+ @f = a[1].split.map{|i| i.strip}
+ @e = a[2].split.map{|i| i.strip}
+ @features = {}
+ a[3].split.each { |i|
+ name,value = i.split '='
+ @features[name] = value.to_f
+ }
+ @alignment = a[4].strip
+ end
+
+ def to_s
+ feature_string = []
+ @features.each_pair { |name,value| feature_string << "#{name}=#{value}" } if @features
+ feature_string = feature_string.join ' '
+ return "#{@nt} ||| #{f.join ' '} ||| #{@e.join ' '} ||| #{feature_string} ||| #{@alignment}"
+ end
+
+ def rule_string
+ return "#{@f.join '_'}|||#{@e.join '_'}"
+ end
+end
+
+class Range
+ attr_accessor :from, :to
+ def initialize
+ @from = nil
+ @to = nil
+ end
+ def to_s
+ return "#{@from}--#{@to}"
+ end
+ def correct(n)
+ t = @from
+ @from = n - @to
+ @to = n - t
+ end
+end
+
+def ignore(rule)
+ return true if (rule.f[0].match('\[X,\d\]')&&rule.f[-1].match('\[X,\d\]')&&rule.e[0].match('\[X,\d\]')&&rule.e[-1].match('\[X,\d\]'))
+ return false
+end
+
+def is_sub(first, second, dir='left')
+ if dir=='right'
+ first = first.reverse; second = second.reverse
+ end
+
+ second_index = 0
+ match = false
+ first_range = Range.new; second_range = Range.new
+ first.each_with_index { |i,first_index|
+ break if i.match('\[X,\d\]') #(/\[X,\d\]/)
+ break if second_index > second.size-1
+ if i == second[second_index]
+ if !match
+ first_range.from = first_index
+ first_range.to = first_index
+ second_range.from = second_index
+ second_range.to = second_index
+ match = true
+ else
+ first_range.to = first_index
+ second_range.to = second_index
+ end
+ second_index += 1
+ else
+ first_range.from = first_range.to = second_range.from = second_range.to = nil
+ end
+ }
+ if dir=='right' && first_range.from&&first_range.to&&second_range.from&&second_range.to
+ first_range.correct(first.size-1)
+ second_range.correct(second.size-1)
+ end
+ return first_range, second_range if (first_range.from&&first_range.to&&second_range.from&&second_range.to)&&
+ (first_range.from>0||first_range.to>0||second_range.from>0||second_range.to>0)
+ return false
+end
+
+def merge(r, s, r_f_range, s_f_range, r_e_range, s_e_range, dir)
+ #ret = []
+ new_rule = Rule.new
+ new_rule.f = Array.new(s.f)
+ new_rule.e = Array.new(s.e)
+ if dir == 'left'
+ return nil if r_f_range.from==0&&r_e_range.from==0
+ (r_f_range.from-1).downto(0) { |i| new_rule.f.unshift r.f[i] }
+ (r_e_range.from-1).downto(0) { |i| new_rule.e.unshift r.e[i] }
+ elsif dir == 'right'
+ return nil if r_f_range.from==r.f.size-1&&r_e_range.from==r.e.size-1
+ (r_f_range.to+1).upto(r.f.size-1) { |i| new_rule.f << r.f[i] }
+ (r_e_range.to+1).upto(r.e.size-1) { |i| new_rule.e << r.e[i] }
+ end
+ return new_rule
+end
+
+def test
+ a = ["der", "eurozone", "[X,1]", "die", "[X,2]"]
+ b = ["eurozone"]
+ x = ["the", "eurozone", "[X,1]", "[X,2]"] # ???
+ y = ["eurozone", "members"]
+
+ r,s = is_sub(a,b)
+ puts "#{r} #{s}"
+
+ r,s = is_sub(b,a)
+ puts "#{r} #{s}"
+
+ r,s = is_sub(x,y)
+ puts "#{r} #{s}"
+
+ r,s = is_sub(y,x)
+ puts "#{r} #{s}"
+
+ puts
+ puts
+
+ a = ["schuld", "an", "[X,1]", "inflation"]
+ b = ["schuld"]
+ x = ["blamed", "for", "[X,1]", "inflation"]
+ y = ["for", "responsible"]
+
+ r,s = is_sub(a,b)
+ puts "#{r} #{s}"
+
+ r,s = is_sub(b,a)
+ puts "#{r} #{s}"
+
+ r,s = is_sub(x,y)
+ puts "#{r} #{s}"
+
+ r,s = is_sub(y,x)
+ puts "#{r} #{s}"
+ exit
+end
+test if ARGV[0] == 'test'
+
+
+# read rules
+rules = []
+strings = {}
+if ARGV[0] != 'test'
+ while line = STDIN.gets
+ rules << Rule.new(line)
+ strings[rules[-1].rule_string] = true
+ end
+end
+
+# main
+done = {}
+rules.each_with_index { |r,i|
+ next if ignore(r)
+ rules.each_with_index { |s,j|
+ next if done.has_key?(i)||ignore(s)||i==j
+ possible_overlap = false
+ r.f.each { |i|
+ if s.f.include? i
+ possible_overlap = true
+ break
+ end
+ }
+ next if !possible_overlap
+
+ # left, R->S
+ range_f_first, range_f_second = is_sub(r.f, s.f)
+ range_e_first, range_e_second = is_sub(r.e, s.e)
+ if range_f_first&&range_f_second&&range_e_first&&range_e_second
+ #puts "R: #{r.f} ||| #{r.e}\nS: #{s.f} ||| #{s.e}"
+ #puts "f:(#{range_f_first} #{range_f_second}) e:(#{range_e_first} #{range_e_second})\n"
+ new_rule = merge(r, s, range_f_first, range_f_first, range_e_first, range_e_second, 'left')
+ #puts "NEW #{new_rule}" if new_rule
+ puts "X" if (new_rule && !strings.has_key?(new_rule.rule_string))
+ #puts
+ end
+
+ # left, S->R
+ range_f_first, range_f_second = is_sub(s.f, r.f)
+ range_e_first, range_e_second = is_sub(s.e, r.e)
+ if range_f_first&&range_f_second&&range_e_first&&range_e_second
+ #puts "S: #{s.f} ||| #{s.e}\nR: #{r.f} ||| #{r.e}"
+ #puts "f:(#{range_f_first} #{range_f_second}) e:(#{range_e_first} #{range_e_second})\n"
+ new_rule = merge(s, r, range_f_first, range_f_first, range_e_first, range_e_second, 'left')
+ #puts "NEW #{new_rule}" if new_rule
+ puts "X" if (new_rule && !strings.has_key?(new_rule.rule_string))
+ #puts
+ end
+
+ # right, R->S
+ range_f_first, range_f_second = is_sub(r.f, s.f, 'right')
+ range_e_first, range_e_second = is_sub(r.e, s.e, 'right')
+ if range_f_first&&range_f_second&&range_e_first&&range_e_second
+ #puts "Rr: #{r.f} ||| #{r.e}\nSr: #{s.f} ||| #{s.e}"
+ #puts "f:(#{range_f_first} #{range_f_second}) e:(#{range_e_first} #{range_e_second})\n"
+ new_rule = merge(r, s, range_f_first, range_f_first, range_e_first, range_e_second, 'right')
+ #puts "NEW #{new_rule}" if new_rule
+ puts "X" if (new_rule && !strings.has_key?(new_rule.rule_string))
+ #puts
+ end
+
+ # right, S->R
+ range_f_first, range_f_second = is_sub(s.f, r.f, 'right')
+ range_e_first, range_e_second = is_sub(s.e, r.e, 'right')
+ if range_f_first&&range_f_second&&range_e_first&&range_e_second
+ #puts "Sr: #{s.f} ||| #{s.e}\nRr: #{r.f} ||| #{r.e}"
+ #puts "f:(#{range_f_first} #{range_f_second}) e:(#{range_e_first} #{range_e_second})\n"
+ new_rule = merge(s, r, range_f_first, range_f_first, range_e_first, range_e_second, 'right')
+ #puts "NEW #{new_rule}" if new_rule
+ puts "X" if (new_rule && !strings.has_key?(new_rule.rule_string))
+ #puts
+ end
+ }
+ done[i] = true
+}
+