From 3677425d88fb8743055cf5056fa27f1123100eb4 Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Sat, 19 Sep 2015 16:24:18 +0200 Subject: misc --- .gitignore | 1 + derivation_to_json/deriv.rb | 151 ------------------------------- derivation_to_json/derivation_to_json.rb | 151 +++++++++++++++++++++++++++++++ derivation_to_json/example | 1 - derivation_to_json/example.json | 2 +- derivation_to_json/example.raw | 1 + 6 files changed, 154 insertions(+), 153 deletions(-) delete mode 100755 derivation_to_json/deriv.rb create mode 100755 derivation_to_json/derivation_to_json.rb delete mode 100644 derivation_to_json/example create mode 100644 derivation_to_json/example.raw diff --git a/.gitignore b/.gitignore index d306650..0d5cbd8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.out *.err out.* +derivation_editor/ util/run_beta_test diff --git a/derivation_to_json/deriv.rb b/derivation_to_json/deriv.rb deleted file mode 100755 index e6230c3..0000000 --- a/derivation_to_json/deriv.rb +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/env ruby - -require 'zipf' - -def conv_cdec_show_deriv s - s.gsub! /\(/, "\n" - s.gsub! /[{}]/, "" - a = s.split "\n" - a.map! { |i| - i.strip.gsub /(\s*\))+$/, "" - } - a.reject! { |i| i.strip=="" } - return a -end - -class RuleAndSpan - attr_accessor :span, :symbol, :source, :target, :subspans, :done, :id - - def initialize s, id - spans, srcs, tgts = splitpipe s.strip - _,@symbol = spans.split - @span = _.split(",", 2).map { |i| i.gsub(/[<>]/, "").to_i} - @source = srcs.strip.split - j = 0 - @source.map! { |i| if i.match(/\[X\]/) then "[#{j+=1}]" else i end } - @target = tgts.strip.split - @subspans = [] - @done = false - @id = id - end - - def to_s - return "#{@id}\t<#{@span.first},#{@span[1]}> [X] ||| #{@source.join " "} ||| #{@target.join " "}" - end - - def is_terminal_rule? - @source.each { |w| - return true if !w.match(/\[S|(\d+)\]/) - } - - return false - end -end - -def proc_deriv s -a = conv_cdec_show_deriv s - -by_span = {} -spans = [] -id = 0 -a.each { |line| - rs = RuleAndSpan.new line, id - id += 1 - by_span[rs.span] = rs - if rs.is_terminal_rule? - spans << rs.span - end -} - -spans.reverse.each_with_index { |s,k| - (spans.size-(k+2)).downto(0) { |l| - t = spans[l] - if s[0] >= t[0] and s[1] <= t[1] - by_span[t].subspans << s - break - end - } -} - -# fix order -spans.each { |s| - by_span[s].subspans.reverse! -} - -def derive span, spans, by_span, o, groups, source - if groups.size==0 || groups.last.size>0 - groups << [] - end - a = nil - if source - a = span.source - else - a = span.target - end - a.each_with_index { |w,k| - nt = w.match /\[(\d+)\]/ - if nt - idx = nt.captures.first.to_i-1 - _ = derive by_span[span.subspans[idx]], spans, by_span, o, groups, source - (k+1).upto(a.size-1) { |i| - if !a[i].match(/\[(\d+)\]/) && groups.last.size>0 - groups << [] - end - } - else - groups.last << ["#{w}", span.id] - o << w - end - } - span.done = true -end - -so = [] -source_groups = [] -spans.each { |span| - next if by_span[span].done - derive by_span[span], spans, by_span, so, source_groups, true -} -#puts "SOURCE" -#puts so.join " " -#puts source_groups.to_s -#puts "##{source_groups.size}" - -spans.each { |s| by_span[s].done = false } - -o = [] -groups = [] -spans.each { |span| - next if by_span[span].done - derive by_span[span], spans, by_span, o, groups, false -} -#puts "TARGET" -#puts o.join " " -#puts groups.to_s -#puts "##{groups.size}" - -source_rgroups = [] -rgroups = [] -source_groups.each { |i| source_rgroups << i.first[1] } -groups.each { |i| rgroups << i.first[1] } - -phrase_align = [] -source_rgroups.each { |i| - phrase_align << [] - rgroups.each_with_index { |j,k| - if i==j - phrase_align.last << k - end - } -} - -h = {} -h[:phrase_alignment] = phrase_align -h[:source_groups] = source_groups.map { |a| a.map { |i| i.first } } -h[:target_groups] = groups.map { |a| a.map { |i| i.first } } - -return h.to_json -end - -puts proc_deriv STDIN.gets.strip - diff --git a/derivation_to_json/derivation_to_json.rb b/derivation_to_json/derivation_to_json.rb new file mode 100755 index 0000000..a5ef2ef --- /dev/null +++ b/derivation_to_json/derivation_to_json.rb @@ -0,0 +1,151 @@ +#!/usr/bin/env ruby + +require 'zipf' + +def conv_cdec_show_deriv s + s.gsub! /\(/, "\n" + s.gsub! /[{}]/, "" + a = s.split "\n" + a.map! { |i| + i.strip.gsub /(\s*\))+$/, "" + } + a.reject! { |i| i.strip=="" } + return a +end + +class RuleAndSpan + attr_accessor :span, :symbol, :source, :target, :subspans, :done, :id + + def initialize s, id + spans, srcs, tgts = splitpipe s.strip + _,@symbol = spans.split + @span = _.split(",", 2).map { |i| i.gsub(/[<>]/, "").to_i} + @source = srcs.strip.split + j = 0 + @source.map! { |i| if i.match(/\[X\]/) then "[#{j+=1}]" else i end } + @target = tgts.strip.split + @subspans = [] + @done = false + @id = id + end + + def to_s + return "#{@id}\t<#{@span.first},#{@span[1]}> [X] ||| #{@source.join " "} ||| #{@target.join " "}" + end + + def is_terminal_rule? + @source.each { |w| + return true if !w.match(/\[S|(\d+)\]/) + } + + return false + end +end + +def proc_deriv s +a = conv_cdec_show_deriv s + +by_span = {} +spans = [] +id = 0 +a.each { |line| + rs = RuleAndSpan.new line, id + id += 1 + by_span[rs.span] = rs + if rs.is_terminal_rule? + spans << rs.span + end +} + +spans.reverse.each_with_index { |s,k| + (spans.size-(k+2)).downto(0) { |l| + t = spans[l] + if s[0] >= t[0] and s[1] <= t[1] + by_span[t].subspans << s + break + end + } +} + +# fix order +spans.each { |s| + by_span[s].subspans.reverse! +} + +def derive span, spans, by_span, o, groups, source + if groups.size==0 || groups.last.size>0 + groups << [] + end + a = nil + if source + a = span.source + else + a = span.target + end + a.each_with_index { |w,k| + nt = w.match /\[(\d+)\]/ + if nt + idx = nt.captures.first.to_i-1 + _ = derive by_span[span.subspans[idx]], spans, by_span, o, groups, source + (k+1).upto(a.size-1) { |i| + if !a[i].match(/\[(\d+)\]/) && groups.last.size>0 + groups << [] + end + } + else + groups.last << ["#{w}", span.id] + o << w + end + } + span.done = true +end + +so = [] +source_groups = [] +spans.each { |span| + next if by_span[span].done + derive by_span[span], spans, by_span, so, source_groups, true +} +#puts "SOURCE" +#puts so.join " " +#puts source_groups.to_s +#puts "##{source_groups.size}" + +spans.each { |s| by_span[s].done = false } + +o = [] +groups = [] +spans.each { |span| + next if by_span[span].done + derive by_span[span], spans, by_span, o, groups, false +} +#puts "TARGET" +#puts o.join " " +#puts groups.to_s +#puts "##{groups.size}" + +source_rgroups = [] +rgroups = [] +source_groups.each { |i| source_rgroups << i.first[1] } +groups.each { |i| rgroups << i.first[1] } + +phrase_align = [] +source_rgroups.each { |i| + phrase_align << [] + rgroups.each_with_index { |j,k| + if i==j + phrase_align.last << k + end + } +} + +h = {} +h[:phrase_alignment] = phrase_align +h[:source_groups] = source_groups.map { |a| a.map { |i| i.first }.join " " } +h[:target_groups] = groups.map { |a| a.map { |i| i.first }.join " " } + +return h.to_json +end + +puts proc_deriv STDIN.gets.strip + diff --git a/derivation_to_json/example b/derivation_to_json/example deleted file mode 100644 index 5128e09..0000000 --- a/derivation_to_json/example +++ /dev/null @@ -1 +0,0 @@ -({<0,41> [Goal] ||| [S] ||| [1]}({<0,41> [S] ||| [S] [X] ||| [1] [2]}({<0,37> [S] ||| [S] [X] ||| [1] [2]}({<0,33> [S] ||| [S] [X] ||| [1] [2]}({<0,32> [S] ||| [S] [X] ||| [1] [2]}({<0,30> [S] ||| [S] [X] ||| [1] [2]}({<0,15> [S] ||| [S] [X] ||| [1] [2]}({<0,10> [S] ||| [S] [X] ||| [1] [2]}({<0,9> [S] ||| [S] [X] ||| [1] [2]}({<0,8> [S] ||| [S] [X] ||| [1] [2]}({<0,7> [S] ||| [S] [X] ||| [1] [2]}({<0,5> [S] ||| [X] ||| [1]}({<0,5> [X] ||| hier also [X] , ||| so here [1] ,}({<2,4> [X] ||| ein bescheidener ||| a modest}) ) ) ({<5,7> [X] ||| auf alle ||| to all}) ) ({<7,8> [X] ||| demokratien ||| democracies}) ) ({<8,9> [X] ||| anzuwendender ||| anzuwendender}) ) ({<9,10> [X] ||| vorschlag ||| proposal}) ) ({<10,15> [X] ||| : [X] ideen ||| : [1] ideas}({<11,14> [X] ||| der markt für ||| the market for}) ) ) ({<15,30> [X] ||| funktioniert [X] , [X] der ||| works [1] [2] the}({<16,17> [X] ||| besser ||| better}) ({<18,29> [X] ||| wenn es den [X] ||| if [1]}({<21,29> [X] ||| [X] fällt , die [X] ||| [1] , the [2]}({<21,23> [X] ||| [X] leichter ||| [1] easier}({<21,22> [X] ||| bürgern ||| citizens}) ) ({<26,29> [X] ||| zielkonflikte zwischen [X] ||| trade @-@ offs between [1]}({<28,29> [X] ||| treffsicherheit ||| treffsicherheit}) ) ) ) ) ) ({<30,32> [X] ||| aussagen und ||| statements and}) ) ({<32,33> [X] ||| unterhaltung ||| entertainment}) ) ({<33,37> [X] ||| oder zwischen [X] und ||| or [1] and}({<35,36> [X] ||| treffsicherheit ||| treffsicherheit}) ) ) ({<37,41> [X] ||| [X] zu erkennen . ||| [1] .}({<37,38> [X] ||| parteitreue ||| parteitreue}) ) ) ) diff --git a/derivation_to_json/example.json b/derivation_to_json/example.json index b84edad..6bb2b19 100644 --- a/derivation_to_json/example.json +++ b/derivation_to_json/example.json @@ -1 +1 @@ -{"phrase_alignment":[[0,2],[1],[0,2],[3],[4],[5],[6],[7,9],[8],[7,9],[10,18],[11],[10,18],[12],[13],[14],[15],[16],[17],[10,18],[19],[20],[21,23],[22],[21,23],[24],[25]],"source_groups":[["hier","also"],["ein","bescheidener"],[","],["auf","alle"],["demokratien"],["anzuwendender"],["vorschlag"],[":"],["der","markt","für"],["ideen"],["funktioniert"],["besser"],[","],["wenn","es","den"],["bürgern"],["leichter"],["fällt",",","die"],["zielkonflikte","zwischen"],["treffsicherheit"],["der"],["aussagen","und"],["unterhaltung"],["oder","zwischen"],["treffsicherheit"],["und"],["parteitreue"],["zu","erkennen","."]],"target_groups":[["so","here"],["a","modest"],[","],["to","all"],["democracies"],["anzuwendender"],["proposal"],[":"],["the","market","for"],["ideas"],["works"],["better"],["if"],["citizens"],["easier"],[",","the"],["trade","@-@","offs","between"],["treffsicherheit"],["the"],["statements","and"],["entertainment"],["or"],["treffsicherheit"],["and"],["parteitreue"],["."]]} +{"phrase_alignment":[[0,2],[1],[0,2],[3],[4],[5],[6],[7,9],[8],[7,9],[10,18],[11],[10,18],[12],[13],[14],[15],[16],[17],[10,18],[19],[20],[21,23],[22],[21,23],[24],[25]],"source_groups":["hier also","ein bescheidener",",","auf alle","demokratien","anzuwendender","vorschlag",":","der markt für","ideen","funktioniert","besser",",","wenn es den","bürgern","leichter","fällt , die","zielkonflikte zwischen","treffsicherheit","der","aussagen und","unterhaltung","oder zwischen","treffsicherheit","und","parteitreue","zu erkennen ."],"target_groups":["so here","a modest",",","to all","democracies","anzuwendender","proposal",":","the market for","ideas","works","better","if","citizens","easier",", the","trade @-@ offs between","treffsicherheit","the","statements and","entertainment","or","treffsicherheit","and","parteitreue","."]} diff --git a/derivation_to_json/example.raw b/derivation_to_json/example.raw new file mode 100644 index 0000000..5128e09 --- /dev/null +++ b/derivation_to_json/example.raw @@ -0,0 +1 @@ +({<0,41> [Goal] ||| [S] ||| [1]}({<0,41> [S] ||| [S] [X] ||| [1] [2]}({<0,37> [S] ||| [S] [X] ||| [1] [2]}({<0,33> [S] ||| [S] [X] ||| [1] [2]}({<0,32> [S] ||| [S] [X] ||| [1] [2]}({<0,30> [S] ||| [S] [X] ||| [1] [2]}({<0,15> [S] ||| [S] [X] ||| [1] [2]}({<0,10> [S] ||| [S] [X] ||| [1] [2]}({<0,9> [S] ||| [S] [X] ||| [1] [2]}({<0,8> [S] ||| [S] [X] ||| [1] [2]}({<0,7> [S] ||| [S] [X] ||| [1] [2]}({<0,5> [S] ||| [X] ||| [1]}({<0,5> [X] ||| hier also [X] , ||| so here [1] ,}({<2,4> [X] ||| ein bescheidener ||| a modest}) ) ) ({<5,7> [X] ||| auf alle ||| to all}) ) ({<7,8> [X] ||| demokratien ||| democracies}) ) ({<8,9> [X] ||| anzuwendender ||| anzuwendender}) ) ({<9,10> [X] ||| vorschlag ||| proposal}) ) ({<10,15> [X] ||| : [X] ideen ||| : [1] ideas}({<11,14> [X] ||| der markt für ||| the market for}) ) ) ({<15,30> [X] ||| funktioniert [X] , [X] der ||| works [1] [2] the}({<16,17> [X] ||| besser ||| better}) ({<18,29> [X] ||| wenn es den [X] ||| if [1]}({<21,29> [X] ||| [X] fällt , die [X] ||| [1] , the [2]}({<21,23> [X] ||| [X] leichter ||| [1] easier}({<21,22> [X] ||| bürgern ||| citizens}) ) ({<26,29> [X] ||| zielkonflikte zwischen [X] ||| trade @-@ offs between [1]}({<28,29> [X] ||| treffsicherheit ||| treffsicherheit}) ) ) ) ) ) ({<30,32> [X] ||| aussagen und ||| statements and}) ) ({<32,33> [X] ||| unterhaltung ||| entertainment}) ) ({<33,37> [X] ||| oder zwischen [X] und ||| or [1] and}({<35,36> [X] ||| treffsicherheit ||| treffsicherheit}) ) ) ({<37,41> [X] ||| [X] zu erkennen . ||| [1] .}({<37,38> [X] ||| parteitreue ||| parteitreue}) ) ) ) -- cgit v1.2.3