From 7e667e541af1532df36ac02c9a32f6da112edbc1 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Wed, 25 Nov 2015 14:50:24 +0100
Subject: lots of stuff, just for backup

---
 derivation_to_json/derivation_to_json.rb | 30 ++++++++++++++++++--------
 derivation_to_json/rec.rb                | 37 ++++++++++++++++++++++++++++----
 2 files changed, 54 insertions(+), 13 deletions(-)

(limited to 'derivation_to_json')

diff --git a/derivation_to_json/derivation_to_json.rb b/derivation_to_json/derivation_to_json.rb
index 3a4eb65..b14b0b5 100755
--- a/derivation_to_json/derivation_to_json.rb
+++ b/derivation_to_json/derivation_to_json.rb
@@ -42,9 +42,10 @@ class RuleAndSpan
 end
 
 class Rule
-  attr_accessor :nt, :f, :e, :v, :a, :ha, :source_groups, :target_groups
+  attr_accessor :nt, :f, :e, :v, :a, :ha, :source_groups, :target_groups, :raw_rule_str
 
   def initialize s
+    @raw_rule_str = s.strip
     splitpipe(s).each_with_index { |i,j|
       i = i.strip.lstrip
       if j == 0 # NT
@@ -115,7 +116,8 @@ class Rule
   end
 
   def to_s
-    "#{@nt} ||| #{@f} ||| #{@e} ||| #{@v} ||| #{@a}\n"
+    #"#{@nt} ||| #{@f} ||| #{@e} ||| #{@v} ||| #{@a}\n"
+    "#{raw_rule_str}"
   end
 end
 
@@ -138,7 +140,7 @@ def conv_cdec_show_deriv s
   return a, rules
 end
 
-def derive span, spans, by_span, o, groups, source
+def derive span, by_span, o, groups, source
   if groups.size==0 || groups.last.size>0
     groups << []
   end
@@ -152,7 +154,7 @@ def derive span, spans, by_span, o, groups, source
     nt = w.match /\[(\d+)\]/
     if nt
       idx = nt.captures.first.to_i-1
-      _ = derive by_span[span.subspans[idx]], spans, by_span, o, groups, source
+      _ = derive by_span[span.subspans[idx]], by_span, o, groups, source
       (k+1).upto(a.size-1) { |i|
         if !a[i].match(/\[(\d+)\]/) && groups.last.size>0
           groups << []
@@ -205,7 +207,7 @@ def proc_deriv s
   source_groups = []
   spans.each { |span|
     next if by_span[span].done
-    derive by_span[span], spans, by_span, so, source_groups, true
+    derive by_span[span], by_span, so, source_groups, true
   }
 
   spans.each { |s| by_span[s].done = false }
@@ -214,7 +216,7 @@ def proc_deriv s
   groups = []
   spans.each { |span|
     next if by_span[span].done
-    derive by_span[span], spans, by_span, o, groups, false
+    derive by_span[span], by_span, o, groups, false
   }
 
   source_rgroups = []
@@ -226,6 +228,7 @@ def proc_deriv s
     rules_by_span_id[i.first[1]] = i.first[2]
   }
 
+  # make/fake phrase alignment
   phrase_align = []
   count_source = {}
   count_target = {}
@@ -256,12 +259,9 @@ def proc_deriv s
           end
         }
       }
-      puts add_to.to_s
-      puts phrase_align.to_s
       add_to.each { |k|
         phrase_align[k] << j
       }
-      puts phrase_align.to_s
     end
   }
 
@@ -281,6 +281,15 @@ def proc_deriv s
     }
   }
 
+  # span info
+  span_info = {}
+  span2id = {}
+  by_span.each { |k,v|
+    span_info[v.id] = [k, v.subspans]
+    span2id[k] = v.id
+  }
+
+  # final object
   h = {}
   h[:phrase_alignment] =  phrase_align
   h[:source_rgroups] = source_rgroups
@@ -288,6 +297,8 @@ def proc_deriv s
   h[:rules_by_span_id] = rules_by_span_id
   h[:source_groups] = source_groups.map { |a| a.map { |i| i.first }.join " " }
   h[:target_groups] = groups.map { |a| a.map { |i| i.first }.join " " }
+  h[:span_info] = span_info
+  h[:span2id] = span2id
 
   return h.to_json
 end
@@ -300,6 +311,7 @@ if __FILE__ == $0
   json = proc_deriv(s)
   obj = JSON.parse(json)
   STDERR.write "#{json}\n"
+  puts obj["source_groups"].join " "
   puts obj["target_groups"].join " "
 end
 
diff --git a/derivation_to_json/rec.rb b/derivation_to_json/rec.rb
index 677a02a..84bdc0d 100755
--- a/derivation_to_json/rec.rb
+++ b/derivation_to_json/rec.rb
@@ -4,8 +4,8 @@ require 'json'
 require 'zipf'
 
 
-before = JSON.parse(ReadFile.read('x.json'))
-after = JSON.parse(ReadFile.read('y.json'))
+before = JSON.parse(ReadFile.read('in7.json'))
+after = JSON.parse(ReadFile.read('out7.json'))
 
 alignment = {}
 after["align"].each { |i|
@@ -29,13 +29,26 @@ before['source_rgroups'].uniq.each { |k|
   } 
 }
 
-srg2idx.each_pair { |k,v|
+def get_target_phrases_for_source_span before, after, alignment, v, dontsort=false
   a = []
   tgt = []
+  target_phrases = [] # alignment seen from target
   v.each { |i|
     a << after["source"][i]
-    tgt << after["target"][alignment[i].first]
+    target_phrases << alignment[i].first if alignment[i]
+  }
+  target_phrases.sort! if !dontsort
+  target_phrases.each { |j|
+    tgt << after["target"][j]
   }
+
+  return a, tgt, target_phrases
+end
+
+
+# k is a rule id in after['rules_by_span_id']
+srg2idx.each_pair { |k,v|
+  a, tgt, target_phrases = get_target_phrases_for_source_span before, after, alignment, v
   rule_before = before['rules_by_span_id'][k.to_s]
   src_side_before = splitpipe(rule_before)[1]
   x = src_side_before.split
@@ -44,6 +57,22 @@ srg2idx.each_pair { |k,v|
   puts rule_before
   puts "#{k} #{a.join " [X] "}"
   puts tgt.to_s
+  puts before["span_info"][k.to_s].to_s
+  puts "target phrases #{target_phrases}"
+  s = ""
+  target_phrases.uniq.each { |j| s += after["target"][j]+" " }
+  puts "S: #{s}"
+  puts "nothing to do" if before["span_info"][k.to_s][1].size==0
+  target_phrase_sub = []
+  before["span_info"][k.to_s][1].each { |subspan|
+    puts subspan.to_s
+    subid = before["span2id"][subspan.to_s]
+    puts "subid #{subid}"
+    puts "XXX #{srg2idx[subid]}"
+    _, _, tp = get_target_phrases_for_source_span before, after, alignment, srg2idx[subid], true
+    target_phrase_sub << tp
+  }
+  puts "targ ph sub #{target_phrase_sub.to_s}"
   puts "---"
   puts
 }
-- 
cgit v1.2.3