diff options
| -rw-r--r-- | help.inc.php | 2 | ||||
| -rw-r--r-- | interface.php | 4 | ||||
| -rw-r--r-- | js/debug.js | 4 | ||||
| -rw-r--r-- | js/lfpe.js | 1 | ||||
| -rw-r--r-- | phrase_alignment/example.txt | 3 | ||||
| -rwxr-xr-x | phrase_alignment/phrase_alignment.rb | 735 | ||||
| -rwxr-xr-x | server.rb | 40 | ||||
| -rw-r--r-- | views/debug.haml | 2 | 
8 files changed, 786 insertions, 5 deletions
| diff --git a/help.inc.php b/help.inc.php index 5132620..e571a6d 100644 --- a/help.inc.php +++ b/help.inc.php @@ -1,4 +1,4 @@ -<p style="margin-top:-1em">Press the 'Next' button to <strong>submit</strong> your post-edit and to request the next segment for post-edition. +<p>Press the 'Next' button to <strong>submit</strong> your post-edit and to request the next segment for post-edition.  Alternatively, in the textual interface, you may just press return when you finished the post-edit ('Target' text area is in focus).</p>  <p>The session can be paused at any time and continued later; However, if you have to pause your session, wait until the activity notification disappears and then press 'Pause', as we are collecting timing information. You may also just reload this site and re-request the segment to reset the timer.</p> diff --git a/interface.php b/interface.php index d8b7a72..dd08b10 100644 --- a/interface.php +++ b/interface.php @@ -91,8 +91,8 @@ foreach($db->raw_source_segments as $s) {  <!-- /Session overview -->  <!-- Help --> -<button id="help_button" onclick="$('#help').toggle('blind')">Help</button> -<div id="help" style="display:none"> +<button id="help_button" class="button" onclick="$('#help').toggle('blind')">Help</button> +<div id="help" style="display:none;margin-left:1em">  <?php include("help.inc.php"); ?>  <p class="xtrasmall">Support: <a href="mailto://simianer@cl.uni-heidelberg.de">Mail</a></p>  <p class="xtrasmall">Session: #<?php echo $_GET["key"]; ?> | <a href="http://coltrane.cl.uni-heidelberg.de:<?php echo $db->port; ?>/debug" target="_blank">Debug</a></p> diff --git a/js/debug.js b/js/debug.js index 76c7b80..633a5c4 100644 --- a/js/debug.js +++ b/js/debug.js @@ -7,5 +7,9 @@ $().ready(function()        }});         })    }) +   +  var d = atob(document.getElementById("svg_b64").innerHTML);  +  $('#svg').append($('<svg width="10000px">'+d+'</svg>')); +  }) @@ -320,6 +320,7 @@ function Next()        $("#next").html("Next");        $("#oov_tgt0").focus();        not_working(false); +      DE_locked = true;      // translation mode      } else { diff --git a/phrase_alignment/example.txt b/phrase_alignment/example.txt new file mode 100644 index 0000000..89bca35 --- /dev/null +++ b/phrase_alignment/example.txt @@ -0,0 +1,3 @@ +a b c d +w x y z +0-1 1-0 2-2 3-3 diff --git a/phrase_alignment/phrase_alignment.rb b/phrase_alignment/phrase_alignment.rb new file mode 100755 index 0000000..9d29798 --- /dev/null +++ b/phrase_alignment/phrase_alignment.rb @@ -0,0 +1,735 @@ +#!/usr/bin/env ruby + +require 'zipf' + + +module PhrasePhraseExtraction + +DEBUG=true +MAX_NT=2 # chiang:2 +MAX_SEED_NUM_WORDS=3 # chiang:10 words phrases! +MAX_SRC_SZ=3 # chiang:5 words phrases! +FORBID_SRC_ADJACENT_SRC_NT=true # chiang:true + +class Rule +  attr_accessor :source, :target, :arity, :source_context, :target_context, :alignment + +  def initialize source_range=nil, target_range=nil, source_context=nil, target_context=nil, alignment=[] +    if source_context && target_range && source_context && target_context +      @source = [source_range] +      @target = [target_range] +      @source_context = source_context +      @target_context = target_context +      @alignment = alignment +    else +      @source = [] +      @target = [] +      @source_context = [] +      @target_context = [] +      @alignment = [] +    end +    @arity = 0 +  end + +  def <=> other_rule +  end + +  def hash +    self.as_trule_string.hash +  end + +  def eql? other +    self.as_trule_string == other.as_trule_string +  end + +  def len_src +    src_len = 0 +    @source.each { |i| +      if i.is_a? String +        src_len += 1 +      else +        src_len += i.last-i.first+1 +      end +    } + +    return src_len +  end + +  def len_tgt +    tgt_len = 0 +    @target.each { |i| +      if i.is_a? String +        tgt_len += 1 +      else +        tgt_len += i.last-i.first+1 +      end +    } + +    return tgt_len +  end + +  def len +    src_len = 0 +    @source.each { |i| +      if i.is_a? String +        src_len += 1 +      else +        src_len += i.last-i.first+1 +      end +    } +    tgt_len = 0 +    @target.each { |i| +      if i.is_a? String +        tgt_len += 1 +      else +        tgt_len += i.last-i.first+1 +      end +    } +    return [src_len, tgt_len] +  end + +  def to_s +    source_string = "" +    @source.each { |i| +      if i.is_a? Range +        source_string += @source_context[i].to_s +      else +        source_string += " #{i} " +      end +    } +    target_string = "" +    @target.each { |i| +      if i.is_a? Range +        target_string += @target_context[i].to_s +      else +        target_string += " #{i} " +      end +    } + +    astr = "" +    @alignment.each { |p| +      astr += " #{p.first}-#{p.last}" +    } +    astr.strip! + +    return "#{source_string.gsub(/\s+/, " ").strip} -> #{target_string.gsub(/\s+/, " ").strip} | #{astr}" +  end + +  def base_alignment +    min_src = @alignment.map{|p| p.first }.min +    min_tgt = @alignment.map{|p| p.last }.min +    @alignment.each_with_index { |p,j| +      @alignment[j] = [p.first-min_src, p.last-min_tgt] +    } +  end + +  def base_alignment2 correct_src, correct_tgt, start_source, start_target +    @alignment.each_with_index { |p,j| +      if p[0] > start_source  +        @alignment[j][0] = [0,p.first-correct_src].max +      end +      if p[1] > start_target +        @alignment[j][1] = [0,p.last-correct_tgt].max +      end +    } +  end + +  def as_trule_string +    source_string = "" +    @source.each { |i| +      if i.is_a? Range +        source_string += @source_context[i].join(" ").strip +      else +        source_string += " #{i} " +      end +    } +    target_string = "" +    @target.each { |i| +      if i.is_a? Range +        target_string += @target_context[i].join(" ").strip +      else +        target_string += " #{i} " +      end +    } +    source_string = source_string.lstrip.strip +    target_string = target_string.lstrip.strip + +    astr = "" +    @alignment.each { |p| +      astr += " #{p.first}-#{p.last}" +    } +    astr.strip! + +    #source_string.gsub!(/\[X,\d+\]/, "[X]") +    return "[X] ||| #{source_string} ||| #{target_string} ||| NewRule=1 ||| #{astr}" +  end + +  def is_terminal? +    #return false if @source.size>1 +    #return false if @target.size>1 +    @source.each { |i| return false if !i.is_a? Range } +    @target.each { |i| return false if !i.is_a? Range } +    return true +  end + +  # check if other_rule is a part of self +  def mergeable_with? other_rule +    return false if !other_rule.is_terminal? +    other_source_begin = other_rule.source.first.first +    other_source_end   = other_rule.source.first.last +    other_target_begin = other_rule.target.first.first +    other_target_end   = other_rule.target.first.last +    b = false +    @source.each { |i| +      next if !i.is_a? Range +      if (   other_source_begin >= i.first \ +          && other_source_end   <= i.last  \ +          && (!(other_source_begin==i.first && other_source_end==i.last))) +        b = true +        break +      end +    } +    return false if !b +    @target.each { |i| +      next if !i.is_a? Range +      if (   other_target_begin >= i.first \ +          && other_target_end   <= i.last  \ +          && (!(other_target_begin==i.first && other_target_end==i.last))) +        b = true +        break +      end +    } + +    return b +  end + +  def self.split a, b, index=0, p="target" +    return "[NEWX,#{index}]"if (a==b) + +    aa = a.to_a +    begin_split = b.first +    end_split   = b.last + +    p1 = aa[0..aa.index([begin_split-1,aa.first].max)] +    p2 = aa[aa.index([end_split+1, aa.last].min)..aa.last] + +    nt = "[NEWX,#{index}]" + +    ret = nil +    if begin_split > a.first && end_split < a.last +      ret = [(p1.first..p1.last), nt, (p2.first..p2.last)] +    elsif begin_split == a.first +      ret = [nt, (p2.first..p2.last)] +    elsif end_split == a.last +      ret = [(p1.first..p1.last), nt] +    end + +    return ret +  end + +  def self.merge r, s +    return nil if !r.mergeable_with? s +    return nil if !s.is_terminal? + +    other_source_begin = s.source.first.first +    other_source_end   = s.source.first.last +    other_target_begin = s.target.first.first +    other_target_end   = s.target.first.last + +    new_rule = Rule.new +    new_rule.source_context = r.source_context +    new_rule.target_context = r.target_context +    new_rule.arity = r.arity+1 +    new_rule.alignment = Array.new +    r.alignment.each { |p| new_rule.alignment << Array.new(p) } # deep copy + +    c = new_rule.arity +    done = false +    correct_src = 0 +    r.source.each_with_index { |i,j| +      if i.is_a? Range +        if (   !done \ +            && other_source_begin >= i.first \ +            && other_source_end   <= i.last) +          new_rule.source << Rule.split(i, (other_source_begin..other_source_end), c, "source") +          new_rule.source.flatten! +          done = true +        else +          new_rule.source << i +        end +      else +        new_rule.source << i +      end +    } +    # relabel Xs (linear) +    switch = false +    k = 1 +    new_rule.source.each_with_index { |i,j| +      if i.is_a? String +        m = i.match(/\[(X|NEWX),(\d+)\]/) +        n = m[1] +        l = m[2].to_i +        if k != l +          switch = true +        end +        new_rule.source[j] = "[#{n},#{k}]" +        k += 1 +      end +    } +    puts "switch #{switch}" if DEBUG +    done = false +    correct_tgt = 0 +    r.target.each_with_index { |i,j| +      if i.is_a? Range +        if (   !done \ +            && other_target_begin >= i.first \ +            && other_target_end   <= i.last) +          new_rule.target << Rule.split(i, (other_target_begin..other_target_end), c) +          new_rule.target.flatten! +          done = true +        else +          new_rule.target << i +        end +      else +        new_rule.target << i +        reorder = true +      end +    } + +    correct_src = r.len_src-new_rule.len_src +    correct_tgt = r.len_tgt-new_rule.len_tgt +    puts "correct_src #{correct_src}" +    puts "correct_tgt #{correct_tgt}" + +    start_correct_source = nil +    j = 0 +    fl = [] +    new_rule.source.each { |i| +      if i.is_a? Range +        fl << new_rule.source_context[i] +      else +        if i.match(/\[NEWX,\d+\]/) +          puts "j = #{j}" +          start_correct_source = j +        end +        fl << i +      end +      j += 1 +    } +    fl.flatten! + +    start_correct_target = nil +    j = 0 +    fl.each { |i| +      if i.match(/\[NEWX,\d+\]/) +        puts "j = #{j}" +        start_correct_source = j +        break +      end +      j += 1 +    } +     +    el = [] +    new_rule.target.each { |i| +      if i.is_a? Range +        el << new_rule.target_context[i] +      else +        el << i +      end +      j += 1 +    } +    el.flatten! + +    start_correct_target = nil +    j = 0 +    el.each { |i| +      if i.match(/\[NEWX,\d+\]/) +        puts "j = #{j}" +        start_correct_target = j +        break +      end +      j += 1 +    } + +    puts "start_correct_source = #{start_correct_source}" +    puts "start_correct_target = #{start_correct_target}" + +    new_rule.base_alignment2 correct_src, correct_tgt, start_correct_source, start_correct_target +    puts "not uniq #{new_rule.alignment.to_s}" +    new_rule.alignment.uniq! + +    puts "a before: #{new_rule.alignment.to_s}" +    puts fl.to_s +    new_rule.alignment.reject! { |p| +      !fl[p.first] || !el[p.last] || fl[p.first].match(/\[(NEWX|X),\d+\]/) || el[p.last].match(/\[(NEWX|X),\d+\]/) +    } +    puts "a after: #{new_rule.alignment.to_s}" +    puts "old len_src #{r.len_src}" +    puts "new len_src #{new_rule.len_src}" +    puts "old len_tgt #{r.len_tgt}" +    puts "new len_tgt #{new_rule.len_tgt}" + +    if switch +      new_rule.target.each_with_index { |i,j| +        if i.is_a? String +          m = i.match(/\[(X|NEWX),(\d+)\]/) +          n = m[1] +          k = m[2].to_i +          l = nil +          if k == 1 +            l = 2 +          else # 2 +            l = 1 +          end +          new_rule.target[j] = "[#{n},#{l}]" +        end +      } +    end + +    new_rule.source.each_with_index { |i,j| +      if i.is_a?(String) && i.match(/\[NEWX,\d\]/) +        i.gsub!(/NEWX/, "X") +      end +    } +    new_rule.target.each_with_index { |i,j| +      if i.is_a?(String) && i.match(/\[NEWX,\d\]/) +        i.gsub!(/NEWX/, "X") +      end +    } + +    return new_rule +  end + +  def expand_fake_alignment +    new_alignment = [] +    if DEBUG +      puts @alignment.to_s +      puts @source.to_s +      puts @target.to_s +    end +    fl = @source.map { |i| +      if i.is_a? Range +        @source_context[i].map{|x|x.split} +      else +        i +      end +    }.flatten 1 +    el = @target.map { |i| +      if i.is_a? Range +        @target_context[i].map{|x|x.split} +      else +        i +      end +    }.flatten 1 +    if DEBUG +      puts fl.to_s +      puts el.to_s +      puts "->" +    end + +    offsets_src = {} +    #offsets_src.default = 0 +    o = 0 +    fl.each_with_index { |i,j| +      if i.is_a? Array +        o += i.size-1 +      end +      offsets_src[j] = o +    } +    offsets_tgt = {} +    #offsets_tgt.default = 0 +    o = 0 +    el.each_with_index { |i,j| +      if i.is_a? Array +        o += i.size-1 +      end +      offsets_tgt[j] = o +    } + +    @alignment.each { |p| +      if DEBUG +        puts p.to_s +        puts "#{offsets_src[p.first]} -- #{offsets_tgt[p.last]}" +      end +      new_alignment << [ p.first+offsets_src[p.first], p.last+offsets_tgt[p.last] ] +      if DEBUG +        puts new_alignment.last.to_s +        puts "---" +        puts +      end +    } +    @alignment = new_alignment +  end + +end + +def PhrasePhraseExtraction.has_alignment a, i, dir="src" +  index = 0 +  index = 1 if dir=="tgt" +  a.each { |p| +    return true if p[index]==i +  } +  return false +end + +def PhrasePhraseExtraction.extract fstart, fend, estart, eend, f, e, a, flen, elen +  a.each { |p| +    fi=p[0]; ei=p[1] +    if (fstart..fend).include? fi +      if ei<estart || ei>eend +        return [] +      end +    end +    if (estart..eend).include? ei +      if fi<fstart || fi>fend +        return [] +      end +    end + +  } +  rules = [] +  fs = fstart +  loop do +    fe = fend +    loop do +      rules << Rule.new(fs..fe, estart..eend, f, e) +      a.each { |p| +        if (fs..fe).include?(p.first) +          rules.last.alignment << p +        end +      } +      rules.last.base_alignment +      fe += 1 +      break if has_alignment(a, fe, "tgt")||fe>=elen +    end +    fs -= 1 +    break has_alignment(a, fs, "src")||fs<0 +  end + +  return rules +end + +def PhrasePhraseExtraction.make_gappy_rules rules, seed_rules +  MAX_NT.times { +    new_rules = [] +    rules.each { |r| +      seed_rules.each { |s| +        if r.mergeable_with? s +          new = Rule.merge r, s +          new_rules << new +          puts "#{r.to_s} <<< #{s.to_s}" if DEBUG +          puts " = #{new.to_s}\n\n" if DEBUG +        end +      } +    } +    rules += new_rules +  } + +  return rules +end + +def PhrasePhraseExtraction.make_seed_rules a, e, f +  rules = [] +  (0..e.size-1).each { |estart| +  (estart..e.size-1).each { |eend| + +    fstart = f.size-1 +    fend   = 0 +    a.each { |p| +      fi=p[0]; ei=p[1] +      if estart<=ei && ei<=eend +        fstart = [fi, fstart].min +        fend   = [fi, fend].max +      end +    } +    next if fstart>fend +    puts "fstart #{fstart}, fend #{fend}, estart #{estart}, eend #{eend}" if DEBUG +    new_rules = extract fstart, fend, estart, eend, f, e, a, f.size, e.size +    new_rules.each { |r| +      puts r.to_s if DEBUG +    } +    rules += new_rules +  } +  } + +  return rules +end + +def PhrasePhraseExtraction.test +  # 0 1 2 3 +  # a b c d +  # w x y z +  # 0-0 +  # 1-3 +  # 2-2 +  # 3-1 +  ra = Rule.new +  rb = Rule.new +  ra.source = [(0..2), "[X,1]"] +  ra.target = [(0..0), "[X,1]", (2..3)] +  ra.source_context = ["a", "b", "c", "d"] +  ra.target_context = ["w", "x", "y", "z"] +  ra.alignment = [[0,0],[1,3],[2,2]] +  ra.arity = 1 +  rb.source = [(1..1)] +  rb.target = [(3..3)] +  rb.source_context = ["a", "b", "c", "d"] +  rb.target_context = ["w", "x", "y", "z"] +  rb.alignment = [[0,0]] +  rb.arity = 0 + +  puts ra.mergeable_with? rb +  nr = Rule.merge ra, rb +  puts ra.to_s +  puts rb.to_s +  puts nr.to_s +end + +def PhrasePhraseExtraction.test_phrase +  ra = Rule.new +  rb = Rule.new +  ra.source = [(0..2), "[X,1]"] +  ra.target = [(0..0), "[X,1]", (2..3)] +  ra.source_context = ["a a", "b b", "c c", "d d"] +  ra.target_context = ["w w", "x x", "y y", "z z"] +  ra.alignment = [[0,0],[1,3],[2,2]] +  #ra.expand_fake_alignment +  ra.arity = 1 +  rb.source = [(1..1)] +  rb.target = [(3..3)] +  rb.source_context = ra.source_context +  rb.target_context = rb.source_context +  rb.alignment = [[0,0]] +  #rb.expand_fake_alignment +  rb.arity = 0 + +  puts ra.mergeable_with? rb +  nr = Rule.merge ra, rb +  puts ra.to_s +  puts rb.to_s +  nr.expand_fake_alignment +  puts nr.to_s +end + +def PhrasePhraseExtraction.test_phrase2 +  source_context = ["a", "b", "c", "Blechbänder", ", besteht", "der Spreizdorn im wesentlichen", "aus", "x"] +  target_context = ["w", "x", "y", "the expansion", "mandrel consists", "essentially of expansion mandrel", "z"] + +  ra = Rule.new +  ra.source = ["[X,1]", (3..6)] +  ra.target = ["[X,1]", (3..5)] +  ra.source_context = source_context +  ra.target_context = target_context +  ra.alignment = [[1,1],[2,2],[3,3],[4,2]] +  ra.arity = 1 + +  rb = Rule.new +  rb.source = [(4..6)] +  rb.target = [(4..5)] +  rb.source_context = source_context +  rb.target_context = target_context +  rb.alignment = [[0,0],[1,1],[2,0]] +  rb.arity = 0 + +  puts ra.mergeable_with? rb +  nr = Rule.merge ra, rb +  puts ra.to_s +  puts rb.to_s +  nr.expand_fake_alignment +  puts nr.to_s +end + +def PhrasePhraseExtraction.extract_rules f, e, as, expand=false +  a = [] +  as.each { |p| +    x,y = p.split "-" +    x = x.to_i; y = y.to_i +    a << [x,y] +  } +  rules = PhrasePhraseExtraction.make_seed_rules a, e,f +  seed_rules = PhrasePhraseExtraction.remove_too_large_seed_phrases rules +  rules = PhrasePhraseExtraction.make_gappy_rules rules, seed_rules + +  if PhrasePhraseExtraction::FORBID_SRC_ADJACENT_SRC_NT +    rules = PhrasePhraseExtraction.remove_adj_nt rules +  end + +  rules = PhrasePhraseExtraction.remove_too_long_src_sides rules + +  if expand +    rules.each { |r| r.expand_fake_alignment } +  end + +  return rules.uniq +end + +def PhrasePhraseExtraction.remove_too_large_seed_phrases rules +  return rules.reject { |r| +    src_len, tgt_len = r.len +    src_len>PhrasePhraseExtraction::MAX_SEED_NUM_WORDS \ +    || tgt_len>PhrasePhraseExtraction::MAX_SEED_NUM_WORDS } +end + +def PhrasePhraseExtraction.remove_adj_nt rules +  return rules.reject { |r| +    b = false +    prev = false +    r.source.each { |i| +      if i.is_a? String +        if prev +          b = true +          break +        end +        prev = true +      else +        prev = false +      end +    } +    b +  } +end + +def PhrasePhraseExtraction.remove_too_long_src_sides rules +  return rules.reject { |r| +    r.len.first > PhrasePhraseExtraction::MAX_SRC_SZ +  } +end + +end # module + +def main +  file = ReadFile.new ARGV[0] + +  f = file.gets.split +  e = file.gets.split +  a = [] +  file.gets.split.each { |p| +    x,y = p.split "-" +    x = x.to_i; y = y.to_i +    a << [x,y] +  } +  rules = PhrasePhraseExtraction.make_seed_rules a, e, f +  seed_rules = PhrasePhraseExtraction.remove_too_large_seed_phrases rules +  rules = PhrasePhraseExtraction.make_gappy_rules rules, seed_rules + +  if PhrasePhraseExtraction::FORBID_SRC_ADJACENT_SRC_NT +    rules = PhrasePhraseExtraction.remove_adj_nt rules +  end + +  rules = PhrasePhraseExtraction.remove_too_long_src_sides rules + +  rules.uniq! + +  rules.each { |r| +    puts r.as_trule_string +  } +end +#main + +def test +  #PhrasePhraseExtraction.test +  #PhrasePhraseExtraction.test_phrase +  PhrasePhraseExtraction.test_phrase2 +end +test + @@ -7,7 +7,9 @@ require 'nanomsg'  require 'zipf'  require 'json'  require 'haml' +require 'uri'  require_relative './derivation_to_json/derivation_to_json' +require_relative './phrase_alignment/phrase_alignment'  # #############################################################################  # Load configuration file and setup global variables @@ -28,6 +30,7 @@ end  DIR="/fast_scratch/simianer/lfpe"  $daemons = {    :tokenizer        => "#{DIR}/lfpe/util/wrapper.rb -a tokenize   -S '__ADDR__' -e #{EXTERNAL} -l #{TARGET_LANG}", +  :tokenizer_src    => "#{DIR}/lfpe/util/wrapper.rb -a tokenize   -S '__ADDR__' -e #{EXTERNAL} -l #{SOURCE_LANG}",    :detokenizer      => "#{DIR}/lfpe/util/wrapper.rb -a detokenize -S '__ADDR__' -e #{EXTERNAL} -l #{TARGET_LANG}",    :detokenizer_src  => "#{DIR}/lfpe/util/wrapper.rb -a detokenize -S '__ADDR__' -e #{EXTERNAL} -l #{SOURCE_LANG}",    :truecaser        => "#{DIR}/lfpe/util/wrapper.rb -a truecase   -S '__ADDR__' -e #{EXTERNAL} -t #{SESSION_DIR}/truecase.model", @@ -142,7 +145,9 @@ end  post '/next' do      # (receive post-edit, update models), send next translation    cross_origin -  data = JSON.parse(request.body.read) +  s = request.body.read +  logmsg :server, "RAW: #{s}" +  data = JSON.parse(URI.decode(s))    logmsg :server, "answer: #{data.to_s}"    # already processing request?    return "locked" if $lock                                             # return @@ -181,6 +186,7 @@ post '/next' do      # (receive post-edit, update models), send next translation    # 5d. actual update    # 6. update database    if data["EDIT"] +    logmsg :server, "#{data.to_s}"      #logmsg :server, params[:example]      rcv_obj = data #JSON.parse params[:example]      # 0. save raw post-edit @@ -189,12 +195,35 @@ post '/next' do      # (receive post-edit, update models), send next translation      reference = ''      if rcv_obj["type"] == 'g'        reference = rcv_obj["target"].join " " +      e = [] +      rcv_obj["target"].each_with_index { |i,j| +        logmsg :server, "before #{i}" +        x = send_recv(:tokenizer, URI.decode(i)) +        prev = x[0] +        x = send_recv(:truecaser, x) +        x[0] = prev if j>0 +        e << x +        logmsg :server, "after #{x}" +      } +      f = [] +      rcv_obj["source_raw"].each { |i| +        f << URI.decode(i) +      } +      logmsg :server, "XXX #{e.to_s}" +      logmsg :server, "XXX #{f.to_s}" +      new_rules = PhrasePhraseExtraction.extract_rules f, e, rcv_obj["align"], true +      f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.rules" +      new_rules = new_rules.map{|r| r.as_trule_string } +      $additional_rules += new_rules +      f.write new_rules.join "\n" +      f.close      else        reference = rcv_obj["post_edit"]      end      $db['post_edits_raw'] << reference      reference = cleanstr(reference) -    $db['feedback'] << data.to_s #params[:example] +    $db['feedback'] << data.to_json #params[:example] +    $db['svg'] << rcv_obj['svg']      $db['durations'] << rcv_obj['duration'].to_i      $db['post_edits_display'] << send_recv(:detokenizer, reference)      # 1. tokenize @@ -320,12 +349,18 @@ post '/next' do      # (receive post-edit, update models), send next translation      obj["progress"]= $db['progress']      obj["raw_source"] = raw_source      w_idx = 0 +    obj["source_groups_raw"] = [] +    obj["source_groups"].each { |i| +      obj["source_groups_raw"] << String.new(i) +    } +    obj["source_groups_raw"][0][0] = source[0]      obj["source_groups"][0][0] = obj["source_groups"][0][0].upcase      obj["source_groups"].each_with_index { |i,j|        prev = obj["source_groups"][j][0]        obj["source_groups"][j] = send_recv(:detokenizer_src, obj["source_groups"][j]).strip        obj["source_groups"][j][0]=prev if j > 0      } +          # save      $db["derivations"] << deriv_s      $db["derivations_proc"] << obj_str @@ -399,6 +434,7 @@ get '/reset_progress' do                                # reset current session    $db['derivations'].clear    $db['derivations_proc'].clear    $db['feedback'].clear +  $db['svg'].clear    $db['progress'] = -1    update_database true    $confirmed = true diff --git a/views/debug.haml b/views/debug.haml index 986f628..013b7b1 100644 --- a/views/debug.haml +++ b/views/debug.haml @@ -33,6 +33,8 @@        %select          - [1000,100,10,1,0.1,0.01,0.001,0.0001,0.00001,0.000001,0.0000001,0.00000001,0.000000001,0.0000000001].each do |i|            %option{:value => i, :onclick => "window.open(\"http://\"+window.location.host+\"/set_sparse_learning_rate/#{i}\");"} #{i} +    %p#svg_b64 #{data2["svg"][data2["progress"]-1]} +    %div#svg      %table        %tr          %td.noborder | 
