From 7e667e541af1532df36ac02c9a32f6da112edbc1 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Wed, 25 Nov 2015 14:50:24 +0100 Subject: lots of stuff, just for backup --- derivation_to_json/derivation_to_json.rb | 30 ++++++++++++++++++-------- derivation_to_json/rec.rb | 37 ++++++++++++++++++++++++++++---- help.inc.php | 33 +++++++++++++++++----------- index.php | 1 + interface.php | 9 ++++---- js/common.js | 1 + js/lfpe.js | 32 ++++++++++++++++++++------- server.rb | 36 ++++++++++++++++++++----------- static/main.css | 4 ++-- util/run_beta_test1 | 8 +++++-- 10 files changed, 136 insertions(+), 55 deletions(-) diff --git a/derivation_to_json/derivation_to_json.rb b/derivation_to_json/derivation_to_json.rb index 3a4eb65..b14b0b5 100755 --- a/derivation_to_json/derivation_to_json.rb +++ b/derivation_to_json/derivation_to_json.rb @@ -42,9 +42,10 @@ class RuleAndSpan end class Rule - attr_accessor :nt, :f, :e, :v, :a, :ha, :source_groups, :target_groups + attr_accessor :nt, :f, :e, :v, :a, :ha, :source_groups, :target_groups, :raw_rule_str def initialize s + @raw_rule_str = s.strip splitpipe(s).each_with_index { |i,j| i = i.strip.lstrip if j == 0 # NT @@ -115,7 +116,8 @@ class Rule end def to_s - "#{@nt} ||| #{@f} ||| #{@e} ||| #{@v} ||| #{@a}\n" + #"#{@nt} ||| #{@f} ||| #{@e} ||| #{@v} ||| #{@a}\n" + "#{raw_rule_str}" end end @@ -138,7 +140,7 @@ def conv_cdec_show_deriv s return a, rules end -def derive span, spans, by_span, o, groups, source +def derive span, by_span, o, groups, source if groups.size==0 || groups.last.size>0 groups << [] end @@ -152,7 +154,7 @@ def derive span, spans, by_span, o, groups, source nt = w.match /\[(\d+)\]/ if nt idx = nt.captures.first.to_i-1 - _ = derive by_span[span.subspans[idx]], spans, by_span, o, groups, source + _ = derive by_span[span.subspans[idx]], by_span, o, groups, source (k+1).upto(a.size-1) { |i| if !a[i].match(/\[(\d+)\]/) && groups.last.size>0 groups << [] @@ -205,7 +207,7 @@ def proc_deriv s source_groups = [] spans.each { |span| next if by_span[span].done - derive by_span[span], spans, by_span, so, source_groups, true + derive by_span[span], by_span, so, source_groups, true } spans.each { |s| by_span[s].done = false } @@ -214,7 +216,7 @@ def proc_deriv s groups = [] spans.each { |span| next if by_span[span].done - derive by_span[span], spans, by_span, o, groups, false + derive by_span[span], by_span, o, groups, false } source_rgroups = [] @@ -226,6 +228,7 @@ def proc_deriv s rules_by_span_id[i.first[1]] = i.first[2] } + # make/fake phrase alignment phrase_align = [] count_source = {} count_target = {} @@ -256,12 +259,9 @@ def proc_deriv s end } } - puts add_to.to_s - puts phrase_align.to_s add_to.each { |k| phrase_align[k] << j } - puts phrase_align.to_s end } @@ -281,6 +281,15 @@ def proc_deriv s } } + # span info + span_info = {} + span2id = {} + by_span.each { |k,v| + span_info[v.id] = [k, v.subspans] + span2id[k] = v.id + } + + # final object h = {} h[:phrase_alignment] = phrase_align h[:source_rgroups] = source_rgroups @@ -288,6 +297,8 @@ def proc_deriv s h[:rules_by_span_id] = rules_by_span_id h[:source_groups] = source_groups.map { |a| a.map { |i| i.first }.join " " } h[:target_groups] = groups.map { |a| a.map { |i| i.first }.join " " } + h[:span_info] = span_info + h[:span2id] = span2id return h.to_json end @@ -300,6 +311,7 @@ if __FILE__ == $0 json = proc_deriv(s) obj = JSON.parse(json) STDERR.write "#{json}\n" + puts obj["source_groups"].join " " puts obj["target_groups"].join " " end diff --git a/derivation_to_json/rec.rb b/derivation_to_json/rec.rb index 677a02a..84bdc0d 100755 --- a/derivation_to_json/rec.rb +++ b/derivation_to_json/rec.rb @@ -4,8 +4,8 @@ require 'json' require 'zipf' -before = JSON.parse(ReadFile.read('x.json')) -after = JSON.parse(ReadFile.read('y.json')) +before = JSON.parse(ReadFile.read('in7.json')) +after = JSON.parse(ReadFile.read('out7.json')) alignment = {} after["align"].each { |i| @@ -29,13 +29,26 @@ before['source_rgroups'].uniq.each { |k| } } -srg2idx.each_pair { |k,v| +def get_target_phrases_for_source_span before, after, alignment, v, dontsort=false a = [] tgt = [] + target_phrases = [] # alignment seen from target v.each { |i| a << after["source"][i] - tgt << after["target"][alignment[i].first] + target_phrases << alignment[i].first if alignment[i] + } + target_phrases.sort! if !dontsort + target_phrases.each { |j| + tgt << after["target"][j] } + + return a, tgt, target_phrases +end + + +# k is a rule id in after['rules_by_span_id'] +srg2idx.each_pair { |k,v| + a, tgt, target_phrases = get_target_phrases_for_source_span before, after, alignment, v rule_before = before['rules_by_span_id'][k.to_s] src_side_before = splitpipe(rule_before)[1] x = src_side_before.split @@ -44,6 +57,22 @@ srg2idx.each_pair { |k,v| puts rule_before puts "#{k} #{a.join " [X] "}" puts tgt.to_s + puts before["span_info"][k.to_s].to_s + puts "target phrases #{target_phrases}" + s = "" + target_phrases.uniq.each { |j| s += after["target"][j]+" " } + puts "S: #{s}" + puts "nothing to do" if before["span_info"][k.to_s][1].size==0 + target_phrase_sub = [] + before["span_info"][k.to_s][1].each { |subspan| + puts subspan.to_s + subid = before["span2id"][subspan.to_s] + puts "subid #{subid}" + puts "XXX #{srg2idx[subid]}" + _, _, tp = get_target_phrases_for_source_span before, after, alignment, srg2idx[subid], true + target_phrase_sub << tp + } + puts "targ ph sub #{target_phrase_sub.to_s}" puts "---" puts } diff --git a/help.inc.php b/help.inc.php index 71fad7a..5132620 100644 --- a/help.inc.php +++ b/help.inc.php @@ -1,16 +1,23 @@

Press the 'Next' button to submit your post-edit and to request the next segment for post-edition. Alternatively, in the textual interface, you may just press return when you finished the post-edit ('Target' text area is in focus).

+

The session can be paused at any time and continued later; However, if you have to pause your session, wait until the activity notification disappears and then press 'Pause', as we are collecting timing information. You may also just reload this site and re-request the segment to reset the timer.

+ +

Please use only a single browser window at the same time. Going back to earlier examples is not possible, please take great care when interacting with the system.

+

Instructions for the graphical interface:

+

To submit a post-edition in the graphical interface all phrases have to be marked as finished.

@@ -18,11 +25,13 @@ Alternatively, in the textual interface, you may just press return when you fini

Known issues:

diff --git a/index.php b/index.php index 5e574e4..af405e2 100644 --- a/index.php +++ b/index.php @@ -35,6 +35,7 @@ +

diff --git a/interface.php b/interface.php index 92c6012..d8b7a72 100644 --- a/interface.php +++ b/interface.php @@ -20,8 +20,6 @@
- -
@@ -52,8 +50,9 @@ Note that the source word may be distorted.
- - + + + Working, please wait for next translation
@@ -81,7 +80,7 @@ foreach($db->raw_source_segments as $s) { } $translation = ""; if ($i < $db->progress) { - $translation = $db->post_edits_raw[$i]; + $translation = $db->post_edits_display[$i]; } echo "".($i+1).".".$s."".$translation.""; $i += 1; diff --git a/js/common.js b/js/common.js index 4e7d3fe..4af7cd0 100644 --- a/js/common.js +++ b/js/common.js @@ -53,6 +53,7 @@ function CreateCORSRequest(method, url) var xhr = new XMLHttpRequest(); if ("withCredentials" in xhr) { xhr.open(method, url, true); + xhr.setRequestHeader('Content-type', 'application/x-www-form-urlencoded; charset=UTF-8'); } else { xhr = null; } diff --git a/js/lfpe.js b/js/lfpe.js index 23ea7a9..334fd16 100644 --- a/js/lfpe.js +++ b/js/lfpe.js @@ -102,11 +102,13 @@ function working() button.setAttribute("disabled", "disabled"); pause_button.setAttribute("disabled", "disabled"); target_textarea.setAttribute("disabled", "disabled"); + document.getElementById("reset_button").setAttribute("disabled", "disabled"); DE_locked = true; } function not_working(fadein=true) { + document.getElementById("reset_button").removeAttribute("disabled"); // elements var button = document.getElementById("next"); var pause_button = document.getElementById("pause_button"); @@ -133,6 +135,7 @@ function not_working(fadein=true) // enable buttons document.getElementById("next").removeAttribute("disabled"); document.getElementById("pause_button").removeAttribute("disabled"); + document.getElementById("reset_button").removeAttribute("disabled"); DE_locked = false; } @@ -162,7 +165,7 @@ function Next() var key = document.getElementById("key").value; // url - next_url = base_url+":"+port+"/next?key="+key; + next_url = base_url+":"+port+"/next"; // post edit var post_edit = ''; @@ -174,17 +177,22 @@ function Next() post_edit = trim(send_data["target"].join(" ")); if (DE_target_done.length != DE_target_shapes.length) post_edit = ""; - send_data["post_edit"] = post_edit; + send_data["post_edit"] = encodeURIComponent(post_edit); + send_data['type'] = 'g'; } else { post_edit = trim(target_textarea.value); send_data["post_edit"] = post_edit; + send_data['type'] = 't'; } + send_data["key"] = key; + // send data // ??? if (oov_correct.value=="false" && post_edit != "") { + send_data["EDIT"] = true; send_data["duration"] = Timer.get(); - send_data["source_value"] = source.value; + send_data["source_value"] = encodeURIComponent(source.value); // compose request //next_url += "&example="+encodeURIComponent(source.value)+"%20%7C%7C%7C%20"+encodeURIComponent(post_edit)+"&duration="+Timer.get(); // no change? @@ -192,11 +200,12 @@ function Next() //next_url += "&nochange=1"; send_data["nochange"] = true; } - next_url += "&example="+encodeURIComponent(JSON.stringify(send_data)); + //next_url += "&example="+encodeURIComponent(JSON.stringify(send_data)); // update document overview document.getElementById("seg_"+(current_seg_id.value)+"_t").innerHTML=post_edit; // OOV correction mode } else if (oov_correct.value=="true") { + send_data["OOV"] = true; var l = document.getElementById("oov_num_items").value; var src = []; var tgt = []; @@ -216,8 +225,9 @@ function Next() //$("#oov_form").css("display", "none"); $("#oov_form").toggle("blind"); $("#next").val("Next"); - next_url += "&correct="+encodeURIComponent(src.join("\t")) - +"%20%7C%7C%7C%20"+encodeURIComponent(tgt.join("\t")) + send_data["correct"] = src.join("\t") + " ||| " + tgt.join("\t"); + //next_url += "&correct="+encodeURIComponent(src.join("\t")) + //+"%20%7C%7C%7C%20"+encodeURIComponent(tgt.join("\t")) // ??? } else { if (source.value != "") { @@ -225,6 +235,7 @@ function Next() target_textarea.removeAttribute("disabled", "disabled"); pause_button.removeAttribute("disabled", "disabled"); button.removeAttribute("disabled", "disabled"); + //document.getElementById("reset_button").removeAttribute("disabled", "disabled"); not_working(); return; } @@ -237,7 +248,7 @@ function Next() } // build request - var xhr = CreateCORSRequest('get', next_url); + var xhr = CreateCORSRequest('post', next_url); if (!xhr) { alert("Error: 2"); // FIXME: do something reasonable } @@ -268,6 +279,7 @@ function Next() $("#target_textarea").attr("rows", 1); button.setAttribute("disabled", "disabled"); pause_button.setAttribute("disabled", "disabled"); + //document.getElementById("reset_button").setAttribute("disabled", "disabled"); if (current_seg_id.value) removeClass(document.getElementById("seg_"+current_seg_id.value), "bold"); @@ -336,6 +348,7 @@ function Next() button.removeAttribute("disabled"); target_textarea.removeAttribute("disabled", "disabled"); pause_button.removeAttribute("disabled", "disabled"); + document.getElementById("reset_button").removeAttribute("disabled"); document.getElementById("seg_"+id).className += " bold"; if (id > 0) { removeClass(document.getElementById("seg_"+(id-1)), "bold"); @@ -359,6 +372,8 @@ function Next() // load data into graphical UI if (ui_type == "g") { DE_init(); + var x = trim(JSON.parse(DE_extract_data())["target"].join(" ")); + last_post_edit.value = x; } // start timer @@ -368,7 +383,7 @@ function Next() xhr.onerror = function() {}; // FIXME: do something reasonable - xhr.send(); // send 'next' request + xhr.send(JSON.stringify(send_data)); // send 'next' request return; } @@ -399,6 +414,7 @@ window.onload = function () document.getElementById("oov_correct").value = false; document.getElementById("displayed_oov_hint").value = false; document.getElementById("init").value = ""; + document.getElementById("reset_button").setAttribute("disabled", "disabled"); not_working(); diff --git a/server.rb b/server.rb index 53c764c..a7b86d7 100755 --- a/server.rb +++ b/server.rb @@ -140,16 +140,19 @@ get '/' do return "" # return end -get '/next' do # (receive post-edit, update models), send next translation +post '/next' do # (receive post-edit, update models), send next translation cross_origin + data = JSON.parse(request.body.read) + logmsg :server, "answer: #{data.to_s}" # already processing request? return "locked" if $lock # return $lock = true - key = params[:key] # TODO: do something with it, e.g. simple auth? - if params[:correct] - logmsg :server, "correct: #{params[:correct]}" + key = data['key'] # TODO: do something with it, e.g. simple auth? + if data["OOV"] + #logmsg :server, "correct: #{params[:correct]}" + logmsg :server, "correct: #{data.to_s}" grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar" - src, tgt = splitpipe(params[:correct]) + src, tgt = splitpipe(data["correct"]) tgt = cleanstr(tgt) src = src.split("\t").map { |i| i.strip } tgt = tgt.split("\t").map { |i| i.strip } @@ -177,17 +180,23 @@ get '/next' do # (receive post-edit, update models), send next translation # 5c. symmetrize alignment # 5d. actual update # 6. update database - if params[:example] - logmsg :server, params[:example] - rcv_obj = JSON.parse params[:example] + if data["EDIT"] + #logmsg :server, params[:example] + rcv_obj = data #JSON.parse params[:example] # 0. save raw post-edit #source, reference = params[:example].strip.split(" ||| ") source = rcv_obj["source_value"] - reference = rcv_obj["target"].join " " + reference = '' + if rcv_obj["type"] == 'g' + reference = rcv_obj["target"].join " " + else + reference = rcv_obj["post_edit"] + end + $db['post_edits_raw'] << reference reference = cleanstr(reference) - $db['feedback'] << params[:example] - $db['post_edits_raw'] << reference.strip + $db['feedback'] << data.to_s #params[:example] $db['durations'] << rcv_obj['duration'].to_i + $db['post_edits_display'] << send_recv(:detokenizer, reference) # 1. tokenize reference = send_recv :tokenizer, reference # 2. truecase @@ -196,7 +205,7 @@ get '/next' do # (receive post-edit, update models), send next translation logmsg :db, "saving processed post-edit" $db['post_edits'] << reference.strip nochange = false - if rcv_obj[:nochange] + if rcv_obj['nochange'] logmsg :server, "no change -> no updates!" nochange = true end @@ -233,7 +242,7 @@ get '/next' do # (receive post-edit, update models), send next translation return {'fin'=>true}.to_json # return elsif !$confirmed \ || ($confirmed && $last_reply && $last_reply!="" \ - && !params[:example] && !$last_reply.to_json["oovs"]) # send last reply + && !data["EDIT"] && !$last_reply.to_json["oovs"]) # send last reply logmsg :server, "locked, re-sending last reply" logmsg :server, "last_reply: '#{$last_reply}'" $lock = false @@ -382,6 +391,7 @@ get '/reset_progress' do # reset current session $db = JSON.parse ReadFile.read DB_FILE $db['post_edits'].clear $db['post_edits_raw'].clear + $db['post_edits_display'].clear $db['mt'].clear $db['mt_raw'].clear $db['updated'].clear diff --git a/static/main.css b/static/main.css index 5e8b161..9d05ec6 100644 --- a/static/main.css +++ b/static/main.css @@ -20,8 +20,8 @@ textarea, input { -webkit-transition: all .5s } -button { - margin: 1em; +.button { + margin: .25em; padding: .25em; background: #fff; font-weight: bold diff --git a/util/run_beta_test1 b/util/run_beta_test1 index cfaf009..a0fe20f 100755 --- a/util/run_beta_test1 +++ b/util/run_beta_test1 @@ -1,6 +1,10 @@ #!/bin/zsh -x -./kill; ./kill; ./kill; for i in ../../sessions/product_de-en_beta_test_*; do - ./run_server $(basename $i) &; sleep 300; +cd /fast_scratch/simianer/lfpe/lfpe/util +./kill; ./kill; ./kill; +for i in ../../sessions/product_de-en_beta_test_*; do + echo $i + echo $(basename $i) + ./run_server $(basename $i) &; sleep 600; done -- cgit v1.2.3