diff options
author | Patrick Simianer <p@simianer.de> | 2015-11-25 14:50:24 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2015-11-25 14:50:24 +0100 |
commit | 7e667e541af1532df36ac02c9a32f6da112edbc1 (patch) | |
tree | 163dd7c4891d23fcf6f0aef56d9e183b72c3b219 | |
parent | 09d2ee0e1e9c36b993c1e3dcf954b0e749cb107c (diff) |
lots of stuff, just for backup
-rwxr-xr-x | derivation_to_json/derivation_to_json.rb | 30 | ||||
-rwxr-xr-x | derivation_to_json/rec.rb | 37 | ||||
-rw-r--r-- | help.inc.php | 33 | ||||
-rw-r--r-- | index.php | 1 | ||||
-rw-r--r-- | interface.php | 9 | ||||
-rw-r--r-- | js/common.js | 1 | ||||
-rw-r--r-- | js/lfpe.js | 32 | ||||
-rwxr-xr-x | server.rb | 36 | ||||
-rw-r--r-- | static/main.css | 4 | ||||
-rwxr-xr-x | util/run_beta_test1 | 8 |
10 files changed, 136 insertions, 55 deletions
diff --git a/derivation_to_json/derivation_to_json.rb b/derivation_to_json/derivation_to_json.rb index 3a4eb65..b14b0b5 100755 --- a/derivation_to_json/derivation_to_json.rb +++ b/derivation_to_json/derivation_to_json.rb @@ -42,9 +42,10 @@ class RuleAndSpan end class Rule - attr_accessor :nt, :f, :e, :v, :a, :ha, :source_groups, :target_groups + attr_accessor :nt, :f, :e, :v, :a, :ha, :source_groups, :target_groups, :raw_rule_str def initialize s + @raw_rule_str = s.strip splitpipe(s).each_with_index { |i,j| i = i.strip.lstrip if j == 0 # NT @@ -115,7 +116,8 @@ class Rule end def to_s - "#{@nt} ||| #{@f} ||| #{@e} ||| #{@v} ||| #{@a}\n" + #"#{@nt} ||| #{@f} ||| #{@e} ||| #{@v} ||| #{@a}\n" + "#{raw_rule_str}" end end @@ -138,7 +140,7 @@ def conv_cdec_show_deriv s return a, rules end -def derive span, spans, by_span, o, groups, source +def derive span, by_span, o, groups, source if groups.size==0 || groups.last.size>0 groups << [] end @@ -152,7 +154,7 @@ def derive span, spans, by_span, o, groups, source nt = w.match /\[(\d+)\]/ if nt idx = nt.captures.first.to_i-1 - _ = derive by_span[span.subspans[idx]], spans, by_span, o, groups, source + _ = derive by_span[span.subspans[idx]], by_span, o, groups, source (k+1).upto(a.size-1) { |i| if !a[i].match(/\[(\d+)\]/) && groups.last.size>0 groups << [] @@ -205,7 +207,7 @@ def proc_deriv s source_groups = [] spans.each { |span| next if by_span[span].done - derive by_span[span], spans, by_span, so, source_groups, true + derive by_span[span], by_span, so, source_groups, true } spans.each { |s| by_span[s].done = false } @@ -214,7 +216,7 @@ def proc_deriv s groups = [] spans.each { |span| next if by_span[span].done - derive by_span[span], spans, by_span, o, groups, false + derive by_span[span], by_span, o, groups, false } source_rgroups = [] @@ -226,6 +228,7 @@ def proc_deriv s rules_by_span_id[i.first[1]] = i.first[2] } + # make/fake phrase alignment phrase_align = [] count_source = {} count_target = {} @@ -256,12 +259,9 @@ def proc_deriv s end } } - puts add_to.to_s - puts phrase_align.to_s add_to.each { |k| phrase_align[k] << j } - puts phrase_align.to_s end } @@ -281,6 +281,15 @@ def proc_deriv s } } + # span info + span_info = {} + span2id = {} + by_span.each { |k,v| + span_info[v.id] = [k, v.subspans] + span2id[k] = v.id + } + + # final object h = {} h[:phrase_alignment] = phrase_align h[:source_rgroups] = source_rgroups @@ -288,6 +297,8 @@ def proc_deriv s h[:rules_by_span_id] = rules_by_span_id h[:source_groups] = source_groups.map { |a| a.map { |i| i.first }.join " " } h[:target_groups] = groups.map { |a| a.map { |i| i.first }.join " " } + h[:span_info] = span_info + h[:span2id] = span2id return h.to_json end @@ -300,6 +311,7 @@ if __FILE__ == $0 json = proc_deriv(s) obj = JSON.parse(json) STDERR.write "#{json}\n" + puts obj["source_groups"].join " " puts obj["target_groups"].join " " end diff --git a/derivation_to_json/rec.rb b/derivation_to_json/rec.rb index 677a02a..84bdc0d 100755 --- a/derivation_to_json/rec.rb +++ b/derivation_to_json/rec.rb @@ -4,8 +4,8 @@ require 'json' require 'zipf' -before = JSON.parse(ReadFile.read('x.json')) -after = JSON.parse(ReadFile.read('y.json')) +before = JSON.parse(ReadFile.read('in7.json')) +after = JSON.parse(ReadFile.read('out7.json')) alignment = {} after["align"].each { |i| @@ -29,13 +29,26 @@ before['source_rgroups'].uniq.each { |k| } } -srg2idx.each_pair { |k,v| +def get_target_phrases_for_source_span before, after, alignment, v, dontsort=false a = [] tgt = [] + target_phrases = [] # alignment seen from target v.each { |i| a << after["source"][i] - tgt << after["target"][alignment[i].first] + target_phrases << alignment[i].first if alignment[i] + } + target_phrases.sort! if !dontsort + target_phrases.each { |j| + tgt << after["target"][j] } + + return a, tgt, target_phrases +end + + +# k is a rule id in after['rules_by_span_id'] +srg2idx.each_pair { |k,v| + a, tgt, target_phrases = get_target_phrases_for_source_span before, after, alignment, v rule_before = before['rules_by_span_id'][k.to_s] src_side_before = splitpipe(rule_before)[1] x = src_side_before.split @@ -44,6 +57,22 @@ srg2idx.each_pair { |k,v| puts rule_before puts "#{k} #{a.join " [X] "}" puts tgt.to_s + puts before["span_info"][k.to_s].to_s + puts "target phrases #{target_phrases}" + s = "" + target_phrases.uniq.each { |j| s += after["target"][j]+" " } + puts "S: #{s}" + puts "nothing to do" if before["span_info"][k.to_s][1].size==0 + target_phrase_sub = [] + before["span_info"][k.to_s][1].each { |subspan| + puts subspan.to_s + subid = before["span2id"][subspan.to_s] + puts "subid #{subid}" + puts "XXX #{srg2idx[subid]}" + _, _, tp = get_target_phrases_for_source_span before, after, alignment, srg2idx[subid], true + target_phrase_sub << tp + } + puts "targ ph sub #{target_phrase_sub.to_s}" puts "---" puts } diff --git a/help.inc.php b/help.inc.php index 71fad7a..5132620 100644 --- a/help.inc.php +++ b/help.inc.php @@ -1,16 +1,23 @@ <p style="margin-top:-1em">Press the 'Next' button to <strong>submit</strong> your post-edit and to request the next segment for post-edition. Alternatively, in the textual interface, you may just press return when you finished the post-edit ('Target' text area is in focus).</p> + <p>The session can be paused at any time and continued later; However, if you have to pause your session, wait until the activity notification disappears and then press 'Pause', as we are collecting timing information. You may also just reload this site and re-request the segment to reset the timer.</p> + + <p>Please use only a <strong>single browser window</strong> at the same time. Going back to earlier examples is not possible, please take great care when interacting with the system.</p> + <p><span style="border-bottom:1px solid #ccc">Instructions for the graphical interface:</span></p> +<p>To submit a post-edition in the graphical interface all phrases have to be marked as finished.</p> <ul> - <li><strong>Moving around:</strong> Press <strong>'S'</strong>, then move around using the arrow keys.</li> - <li><strong>Reordering of target phrases:</strong> Press <strong>'M'</strong>, then use the arrow keys.</li> - <li><strong>Adding/removing alignments:</strong> Select a source phrase by left-clicking on its box, then click on a suitable target phrase (click selected source phrase again to cancel).</li> - <li><strong>Edit text:</strong> Double click on text, click outside once or press return to stop editing. Alternatively, press <strong>'E'</strong> to edit the contents of the current phrase.</li> - <li><strong>Adding target phrases:</strong> Click '+' button to add a new phrase at the end of the translation. To add a phrase right next to the currently selected phrase just press <strong>'A'</strong>.</li> - <li><strong>Removing target phrases:</strong> Alt-click a phrase and press <strong>'D'</strong> key. Selecting multiple phrases for deletion is also possible. To delete the currently selected phrase just press <strong>'D'</strong>.</li> + <li><strong>Moving around:</strong> Press <strong>'S'</strong>, then select phrases using the arrow keys.</li> + <li><strong>Editing text:</strong> Double click on a phrase or press <strong>'E'</strong> to edit the contents of the current phrase. Press 'Return' to save.</li> + <li><strong>Reordering of target phrases:</strong> Press <strong>'M'</strong>, then use the arrow keys to move the selected phrase. Press 'Return' to fix the position.</li> + <li><strong>Mark phrase as finished:</strong> Press <strong>'Return'</strong> to mark phrases as finished (press 'Return' again to undo). Moving, editing or deleting of finished phrases is not possible.</li> + <li><strong>Adding target phrases:</strong> To add a phrase right next to the currently selected one press <strong>'A'</strong>.</li> + <li><strong>Removing target phrases:</strong> Press <strong>'D'</strong> to delete the currently selected phrase.</li> + <li><strong>Adding/removing alignments:</strong> Select a source phrase by clicking on it, then click on a suitable target phrase to connect or disconnect. Click the selected source phrase again to cancel.</li> + <li><strong>Undo:</strong> Press <strong>'U'</strong> to undo alignments, text edits and deletion of phrases.</li> <li><strong>Reset:</strong> Click 'Reset' button to start from scratch.</li> </ul> @@ -18,11 +25,13 @@ Alternatively, in the textual interface, you may just press return when you fini <p><span style="border-bottom:1px solid #ccc">Known issues:</span></p> <ul> -<!-- <li>collision detection is not perfect, fast dragging may lead to strange behavior</li>--> -<!-- <li>graphical editor can still be used while faded</li>--> - <li>The width of the canvas of graphical editor may be to small when adding a lot of phrases</li> -<!-- <li>in-line editor background is red</li>--> - <li>The in-line editor may change its height</li> - <li>When editing phrases with no content input box is lower than normal</li> +<!-- <li>Collision detection is not perfect, fast dragging may lead to strange behavior.</li>--> +<!-- <li>Graphical editor can still be used while faded.</li>--> + <li>The width of the canvas of graphical editor may be to small when adding a lot of phrases.</li> + <li>The in-line editor may change height.</li> + <li>When editing phrases that have no contents, the input box is lower than normal.</li> + <li>Post-edits in the session overview may be partially tokenized.</li> + <li>Horizontal scrollbar doesn't follow highlighted phrase.</li> + <li>Mouseover is not detected for undoing.</li> </ul> @@ -35,6 +35,7 @@ <option value="product_de-en_beta_test_D" onclick="document.getElementById('key').value=this.value;">D</option> <option value="product_de-en_beta_test_D_sparse" onclick="document.getElementById('key').value=this.value;">D [sparse]</option> <option value="toy_example" onclick="document.getElementById('key').value=this.value;">toy example</option> + <option value="product_de-en_toy_example" onclick="document.getElementById('key').value=this.value;">toy example [new]</option> </select> </p> </div> diff --git a/interface.php b/interface.php index 92c6012..d8b7a72 100644 --- a/interface.php +++ b/interface.php @@ -20,8 +20,6 @@ <!-- Derivation editor --> <div id="derivation_editor"> <div id="holder"><img style="margin:.4em" src="static/placeholder.png" /></div> - <input type="button" value="+" onclick="DE_add_object()" /> - <input type="button" value="Reset" onclick="DE_init();" /> </div> <!-- /Derivation editor--> @@ -52,8 +50,9 @@ Note that the source word may be distorted.</span> <!-- Buttons --> <div> - <button id="pause_button" type="button" onclick="pause()">Pause</button> - <button id="next" type="button" onclick="Next();">Start/Continue</button> + <button id="pause_button" class='button' type="button" onclick="pause()">Pause</button> + <button id="reset_button" class='button' type="button" onclick="DE_init()">Reset</button> + <button id="next" type="button" class='button' onclick="Next();">Start/Continue</button> <span id="status"><strong>Working, please wait for next translation</strong> <img src="static/ajax-loader-large.gif" width="20px" /></span> </div> <!-- /Buttons --> @@ -81,7 +80,7 @@ foreach($db->raw_source_segments as $s) { } $translation = ""; if ($i < $db->progress) { - $translation = $db->post_edits_raw[$i]; + $translation = $db->post_edits_display[$i]; } echo "<tr class='".$class."' id='seg_".$i."'><td class='num'>".($i+1).".</td><td>".$s."</td><td class='seg_text' id='seg_".$i."_t'>".$translation."</td></tr>"; $i += 1; diff --git a/js/common.js b/js/common.js index 4e7d3fe..4af7cd0 100644 --- a/js/common.js +++ b/js/common.js @@ -53,6 +53,7 @@ function CreateCORSRequest(method, url) var xhr = new XMLHttpRequest(); if ("withCredentials" in xhr) { xhr.open(method, url, true); + xhr.setRequestHeader('Content-type', 'application/x-www-form-urlencoded; charset=UTF-8'); } else { xhr = null; } @@ -102,11 +102,13 @@ function working() button.setAttribute("disabled", "disabled"); pause_button.setAttribute("disabled", "disabled"); target_textarea.setAttribute("disabled", "disabled"); + document.getElementById("reset_button").setAttribute("disabled", "disabled"); DE_locked = true; } function not_working(fadein=true) { + document.getElementById("reset_button").removeAttribute("disabled"); // elements var button = document.getElementById("next"); var pause_button = document.getElementById("pause_button"); @@ -133,6 +135,7 @@ function not_working(fadein=true) // enable buttons document.getElementById("next").removeAttribute("disabled"); document.getElementById("pause_button").removeAttribute("disabled"); + document.getElementById("reset_button").removeAttribute("disabled"); DE_locked = false; } @@ -162,7 +165,7 @@ function Next() var key = document.getElementById("key").value; // url - next_url = base_url+":"+port+"/next?key="+key; + next_url = base_url+":"+port+"/next"; // post edit var post_edit = ''; @@ -174,17 +177,22 @@ function Next() post_edit = trim(send_data["target"].join(" ")); if (DE_target_done.length != DE_target_shapes.length) post_edit = ""; - send_data["post_edit"] = post_edit; + send_data["post_edit"] = encodeURIComponent(post_edit); + send_data['type'] = 'g'; } else { post_edit = trim(target_textarea.value); send_data["post_edit"] = post_edit; + send_data['type'] = 't'; } + send_data["key"] = key; + // send data // ??? if (oov_correct.value=="false" && post_edit != "") { + send_data["EDIT"] = true; send_data["duration"] = Timer.get(); - send_data["source_value"] = source.value; + send_data["source_value"] = encodeURIComponent(source.value); // compose request //next_url += "&example="+encodeURIComponent(source.value)+"%20%7C%7C%7C%20"+encodeURIComponent(post_edit)+"&duration="+Timer.get(); // no change? @@ -192,11 +200,12 @@ function Next() //next_url += "&nochange=1"; send_data["nochange"] = true; } - next_url += "&example="+encodeURIComponent(JSON.stringify(send_data)); + //next_url += "&example="+encodeURIComponent(JSON.stringify(send_data)); // update document overview document.getElementById("seg_"+(current_seg_id.value)+"_t").innerHTML=post_edit; // OOV correction mode } else if (oov_correct.value=="true") { + send_data["OOV"] = true; var l = document.getElementById("oov_num_items").value; var src = []; var tgt = []; @@ -216,8 +225,9 @@ function Next() //$("#oov_form").css("display", "none"); $("#oov_form").toggle("blind"); $("#next").val("Next"); - next_url += "&correct="+encodeURIComponent(src.join("\t")) - +"%20%7C%7C%7C%20"+encodeURIComponent(tgt.join("\t")) + send_data["correct"] = src.join("\t") + " ||| " + tgt.join("\t"); + //next_url += "&correct="+encodeURIComponent(src.join("\t")) + //+"%20%7C%7C%7C%20"+encodeURIComponent(tgt.join("\t")) // ??? } else { if (source.value != "") { @@ -225,6 +235,7 @@ function Next() target_textarea.removeAttribute("disabled", "disabled"); pause_button.removeAttribute("disabled", "disabled"); button.removeAttribute("disabled", "disabled"); + //document.getElementById("reset_button").removeAttribute("disabled", "disabled"); not_working(); return; } @@ -237,7 +248,7 @@ function Next() } // build request - var xhr = CreateCORSRequest('get', next_url); + var xhr = CreateCORSRequest('post', next_url); if (!xhr) { alert("Error: 2"); // FIXME: do something reasonable } @@ -268,6 +279,7 @@ function Next() $("#target_textarea").attr("rows", 1); button.setAttribute("disabled", "disabled"); pause_button.setAttribute("disabled", "disabled"); + //document.getElementById("reset_button").setAttribute("disabled", "disabled"); if (current_seg_id.value) removeClass(document.getElementById("seg_"+current_seg_id.value), "bold"); @@ -336,6 +348,7 @@ function Next() button.removeAttribute("disabled"); target_textarea.removeAttribute("disabled", "disabled"); pause_button.removeAttribute("disabled", "disabled"); + document.getElementById("reset_button").removeAttribute("disabled"); document.getElementById("seg_"+id).className += " bold"; if (id > 0) { removeClass(document.getElementById("seg_"+(id-1)), "bold"); @@ -359,6 +372,8 @@ function Next() // load data into graphical UI if (ui_type == "g") { DE_init(); + var x = trim(JSON.parse(DE_extract_data())["target"].join(" ")); + last_post_edit.value = x; } // start timer @@ -368,7 +383,7 @@ function Next() xhr.onerror = function() {}; // FIXME: do something reasonable - xhr.send(); // send 'next' request + xhr.send(JSON.stringify(send_data)); // send 'next' request return; } @@ -399,6 +414,7 @@ window.onload = function () document.getElementById("oov_correct").value = false; document.getElementById("displayed_oov_hint").value = false; document.getElementById("init").value = ""; + document.getElementById("reset_button").setAttribute("disabled", "disabled"); not_working(); @@ -140,16 +140,19 @@ get '/' do return "" # return end -get '/next' do # (receive post-edit, update models), send next translation +post '/next' do # (receive post-edit, update models), send next translation cross_origin + data = JSON.parse(request.body.read) + logmsg :server, "answer: #{data.to_s}" # already processing request? return "locked" if $lock # return $lock = true - key = params[:key] # TODO: do something with it, e.g. simple auth? - if params[:correct] - logmsg :server, "correct: #{params[:correct]}" + key = data['key'] # TODO: do something with it, e.g. simple auth? + if data["OOV"] + #logmsg :server, "correct: #{params[:correct]}" + logmsg :server, "correct: #{data.to_s}" grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar" - src, tgt = splitpipe(params[:correct]) + src, tgt = splitpipe(data["correct"]) tgt = cleanstr(tgt) src = src.split("\t").map { |i| i.strip } tgt = tgt.split("\t").map { |i| i.strip } @@ -177,17 +180,23 @@ get '/next' do # (receive post-edit, update models), send next translation # 5c. symmetrize alignment # 5d. actual update # 6. update database - if params[:example] - logmsg :server, params[:example] - rcv_obj = JSON.parse params[:example] + if data["EDIT"] + #logmsg :server, params[:example] + rcv_obj = data #JSON.parse params[:example] # 0. save raw post-edit #source, reference = params[:example].strip.split(" ||| ") source = rcv_obj["source_value"] - reference = rcv_obj["target"].join " " + reference = '' + if rcv_obj["type"] == 'g' + reference = rcv_obj["target"].join " " + else + reference = rcv_obj["post_edit"] + end + $db['post_edits_raw'] << reference reference = cleanstr(reference) - $db['feedback'] << params[:example] - $db['post_edits_raw'] << reference.strip + $db['feedback'] << data.to_s #params[:example] $db['durations'] << rcv_obj['duration'].to_i + $db['post_edits_display'] << send_recv(:detokenizer, reference) # 1. tokenize reference = send_recv :tokenizer, reference # 2. truecase @@ -196,7 +205,7 @@ get '/next' do # (receive post-edit, update models), send next translation logmsg :db, "saving processed post-edit" $db['post_edits'] << reference.strip nochange = false - if rcv_obj[:nochange] + if rcv_obj['nochange'] logmsg :server, "no change -> no updates!" nochange = true end @@ -233,7 +242,7 @@ get '/next' do # (receive post-edit, update models), send next translation return {'fin'=>true}.to_json # return elsif !$confirmed \ || ($confirmed && $last_reply && $last_reply!="" \ - && !params[:example] && !$last_reply.to_json["oovs"]) # send last reply + && !data["EDIT"] && !$last_reply.to_json["oovs"]) # send last reply logmsg :server, "locked, re-sending last reply" logmsg :server, "last_reply: '#{$last_reply}'" $lock = false @@ -382,6 +391,7 @@ get '/reset_progress' do # reset current session $db = JSON.parse ReadFile.read DB_FILE $db['post_edits'].clear $db['post_edits_raw'].clear + $db['post_edits_display'].clear $db['mt'].clear $db['mt_raw'].clear $db['updated'].clear diff --git a/static/main.css b/static/main.css index 5e8b161..9d05ec6 100644 --- a/static/main.css +++ b/static/main.css @@ -20,8 +20,8 @@ textarea, input { -webkit-transition: all .5s } -button { - margin: 1em; +.button { + margin: .25em; padding: .25em; background: #fff; font-weight: bold diff --git a/util/run_beta_test1 b/util/run_beta_test1 index cfaf009..a0fe20f 100755 --- a/util/run_beta_test1 +++ b/util/run_beta_test1 @@ -1,6 +1,10 @@ #!/bin/zsh -x -./kill; ./kill; ./kill; for i in ../../sessions/product_de-en_beta_test_*; do - ./run_server $(basename $i) &; sleep 300; +cd /fast_scratch/simianer/lfpe/lfpe/util +./kill; ./kill; ./kill; +for i in ../../sessions/product_de-en_beta_test_*; do + echo $i + echo $(basename $i) + ./run_server $(basename $i) &; sleep 600; done |