diff options
-rw-r--r-- | common.js | 58 | ||||
-rw-r--r-- | interface.php | 51 | ||||
-rw-r--r-- | lfpe.js | 325 | ||||
-rwxr-xr-x | server.rb | 57 |
4 files changed, 308 insertions, 183 deletions
diff --git a/common.js b/common.js new file mode 100644 index 0000000..11dcdba --- /dev/null +++ b/common.js @@ -0,0 +1,58 @@ +var data, + ui_type; + + +/* + * hacky way to remove class from node + * + */ +function removeClass(node, className) +{ + node.className = + node.className.replace(" "+className,''); + node.className = + node.className.replace(" "+className,''); // ??? + + return false; +} + +/* + * + * + */ +function toggleDisplay(node) +{ + if (node.style.display=='none') { + node.style.display = 'block'; + } else { + node.style.display = 'none'; + } + + return false; +} + +/* + * trim string + * + */ +function trim(s) +{ + return s.replace(/^\s+|\s+$/g, ''); +} + +/* + * cross-site request + * + */ +function CreateCORSRequest(method, url) +{ + var xhr = new XMLHttpRequest(); + if ("withCredentials" in xhr) { + xhr.open(method, url, true); + } else { + xhr = null; + } + + return xhr; +} + diff --git a/interface.php b/interface.php index 19a8cbd..df82d97 100644 --- a/interface.php +++ b/interface.php @@ -2,26 +2,47 @@ <head> <meta charset="utf-8" /> <title>Post-editing application (Session: #<?php echo $_GET["key"]; ?>)</title> + <script src="common.js"></script> <script src="lfpe.js"></script> <link rel="stylesheet" type="text/css" href="lfpe.css" /> + <script src="//ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script> + <script src="derivation_editor/raphael.js" type="text/javascript" charset="utf-8"></script> + <script src="https://raw.githubusercontent.com/marmelab/Raphael.InlineTextEditing/fd578f0eddd4172e6d9b3fde4cb67576cf546dc1/raphael.inline_text_editing.js" charset="utf-8"></script> + <script src="derivation_editor/edit.js" charset="utf-8"></script> </head> -<body onload="init()"> +<body> <?php include("header.php"); ?> -<!-- Source and target --> -<table> -<tr> - <td align="right">Source:</td> - <td><textarea id="raw_source_textarea" name="source" cols="80" rows="1" disabled></textarea></td> -</tr> -<tr> - <td align="right">Target:</td> - <td><textarea id="target_textarea" name="target" cols="80" rows="1" onkeypress="catch_return(event)"></textarea></td> -</tr> -</table> -<!-- /Source and target --> +<!-- Derivation editor --> +<div id="derivation_editor"> + <div id="holder"><img style="margin:.4em" src="placeholder.png" /></div> + <input type="button" value="+" onClick="add_obj()" /> + <input type="button" value="Reset" onClick="reset_derivation_edtior(true);" /> +</div> +<!-- /Derivation editor--> + +<!-- Source and target textboxes --> +<div id="textboxes"> + <table> + <tr> + <td align="right">Source:</td> + <td><textarea id="raw_source_textarea" name="source" cols="80" rows="1" disabled></textarea></td> + </tr> + <tr> + <td align="right">Target:</td> + <td><textarea id="target_textarea" name="target" cols="80" rows="1" onkeypress="catch_return(event)"></textarea></td> + </tr> + </table> +</div> +<div id="oov_form"> + <p class="small" style="margin-bottom:0"><strong>Unknown words:</strong><br /> + Please enter a translation for each source word.</p> + <div id="oov_fields"></div> +</div> +<!-- /Source and target textboxes --> + <!-- Buttons --> <div> @@ -61,7 +82,7 @@ foreach($db->raw_source_segments as $s) { <!-- /Session overview --> <!-- Help --> -<button id="help_button" onclick="toggleDisplay(document.getElementById('help'));">Help</button> +<button id="help_button" onclick="toggleDisplay(document.getElementById('help'));">Help</button> <div id="help" style="display:none"> <p>Press the 'Next' button to submit your post-edit and to request the next segment for translation. Alternatively, just press enter when you finished the post-edit and the 'Target' text area is in focus. Warning: Past post-edits can not be altered. The session can be stopped at any time and continued later; However, if you have to pause your session, wait until the activity notification disappears and then press 'Pause', as we are collecting timing information. You may also just reload this site and re-request the next segment upon your return. Please only use <em>one</em> browser window at once. Going back to earlier examples is not possible, please take great care when interacting with the system.<br/> @@ -86,5 +107,7 @@ The interface was only tested with Firefox 31.</p> <textarea style="display:none" id="displayed_oov_hint">0</textarea> <textarea style="display:none" id="port"><?php echo $db->port; ?></textarea> <textarea style="display:none" id="init">0</textarea> +<textarea style="display:none" id="ui_type"><?php echo $_GET["ui_type"]; ?></textarea> +<textarea style="display:none" id="data"></textarea> <!-- /Data --> @@ -29,70 +29,6 @@ var Timer = { } /* - * init site - * - */ -function init() -{ - document.getElementById("target_textarea").value = ""; - document.getElementById("raw_source_textarea").value = ""; - document.getElementById("source").value = ""; - document.getElementById("current_seg_id").value = ""; - document.getElementById("paused").value = ""; - document.getElementById("oov_correct").value = false; - document.getElementById("displayed_oov_hint").value = false; - document.getElementById("init").value = ""; - document.getElementById("target_textarea").setAttribute("disabled", "disabled"); - document.getElementById("next").removeAttribute("disabled"); - document.getElementById("pause_button").removeAttribute("disabled"); - - return false; -} - -/* - * cross-site request - * - */ -function CreateCORSRequest(method, url) -{ - var xhr = new XMLHttpRequest(); - if ("withCredentials" in xhr) { - xhr.open(method, url, true); - } else { - xhr = null; - } - - return xhr; -} - -/* - * no newline on return in textarea - * - */ -function catch_return(e) -{ - if (e.keyCode == 13) { - e.preventDefault(); - Next(); - } - - return false; -} - -/* - * check oov correction input - * - */ -function check_oov_correction() -{ - var need = trim(document.getElementById("raw_source_textarea").value).split(";").length; - var a = trim(document.getElementById("target_textarea").value).split(";"); - a = a.filter(function(i){ return i!=""; }) - - return need==a.length; -} - -/* * pause/unpause timer * */ @@ -103,6 +39,7 @@ function pause() var next_button = document.getElementById("next"); var target_textarea = document.getElementById("target_textarea") var initialized = document.getElementById("init"); + if (paused.value == 0) { button.innerHTML = "Unpause"; paused.value = 1; @@ -121,15 +58,15 @@ function pause() } /* - * hacky way to remove class from node + * no newline on return in textarea * */ -function removeClass(node, className) +function catch_return(e) { - node.className = - node.className.replace(" "+className,''); - node.className = - node.className.replace(" "+className,''); // ??? + if (e.keyCode == 13) { + e.preventDefault(); + Next(); + } return false; } @@ -138,24 +75,51 @@ function removeClass(node, className) * * */ -function toggleDisplay(node) +function working() { - if (node.style.display=='none') { - node.style.display = 'block'; - } else { - node.style.display = 'none'; - } + // elements + var button = document.getElementById("next"); + var pause_button = document.getElementById("pause_button"); + var target_textarea = document.getElementById("target_textarea") + var raw_source_textarea = document.getElementById("raw_source_textarea"); + var current_seg_id = document.getElementById("current_seg_id"); + var source = document.getElementById("source"); + var status = document.getElementById("status"); + var oov_correct = document.getElementById("oov_correct"); + var last_post_edit = document.getElementById("last_post_edit"); - return false; + // show 'working' message + status.style.display = "block"; + + // disable button and textarea + button.setAttribute("disabled", "disabled"); + pause_button.setAttribute("disabled", "disabled"); + target_textarea.setAttribute("disabled", "disabled"); } /* - * trim string + * * */ -function trim(s) +function not_working() { - return s.replace(/^\s+|\s+$/g, ''); + // elements + var button = document.getElementById("next"); + var pause_button = document.getElementById("pause_button"); + var target_textarea = document.getElementById("target_textarea") + var raw_source_textarea = document.getElementById("raw_source_textarea"); + var current_seg_id = document.getElementById("current_seg_id"); + var source = document.getElementById("source"); + var status = document.getElementById("status"); + var oov_correct = document.getElementById("oov_correct"); + var last_post_edit = document.getElementById("last_post_edit"); + + // hide 'working' message + status.style.display = "none"; + + // enable buttons + document.getElementById("next").removeAttribute("disabled"); + document.getElementById("pause_button").removeAttribute("disabled"); } /* @@ -175,19 +139,28 @@ function Next() var oov_correct = document.getElementById("oov_correct"); var last_post_edit = document.getElementById("last_post_edit"); - // disable button and textarea - button.setAttribute("disabled", "disabled"); - pause_button.setAttribute("disabled", "disabled"); - target_textarea.setAttribute("disabled", "disabled"); + working(); // get metadata stored in DOM + var base_url = "http://coltrane.cl.uni-heidelberg.de"; var port = document.getElementById("port").value; - var base_url = "http://coltrane.cl.uni-heidelberg.de:"+port; var key = document.getElementById("key").value; - next_url = base_url+"/next?key="+key; + // url + next_url = base_url+":"+port+"/next?key="+key; - var post_edit = trim(target_textarea.value); + // post edit + var post_edit = ''; + + // extract data from interfaces + if (ui_type == 'g') { + post_edit = JSON.parse(extract_data())["target"].join(" ") + } else { + post_edit = trim(target_textarea.value); + } + + // send data + // ??? if (oov_correct.value=="false" && post_edit != "") { // compose request next_url += "&example="+encodeURIComponent(source.value)+"%20%7C%7C%7C%20"+encodeURIComponent(post_edit)+"&duration="+Timer.get(); @@ -197,15 +170,28 @@ function Next() } // update document overview document.getElementById("seg_"+(current_seg_id.value)+"_t").innerHTML=post_edit; + // OOV correction mode } else if (oov_correct.value=="true") { - if (!check_oov_correction()) { - alert("Please provide translations for each word in the 'Source' text area, separated by ';'."); - target_textarea.removeAttribute("disabled", "disabled"); - pause_button.removeAttribute("disabled", "disabled"); - button.removeAttribute("disabled", "disabled"); - return; - } - next_url += "&correct="+encodeURIComponent(raw_source_textarea.value)+"%20%7C%7C%7C%20"+encodeURIComponent(post_edit) + var l = document.getElementById("oov_fields").children.length; + var src = []; + var tgt = []; + for (var i=0; i<l/2; i++) { + src.push(trim(document.getElementById("oov_src"+i).value)); + tgt.push(trim(document.getElementById("oov_tgt"+i).value)); + if (tgt[tgt.length-1] == "") { // empty correction + alert("Please provide translations for all OOV words."); + not_working(); + + return; + } + } + var l = document.getElementById("oov_fields").children.length; + for (var i=0; i<l; i++) + { document.getElementById("oov_fields").children[0].remove(); } + $("#oov_form").css("display", "none"); + next_url += "&correct="+encodeURIComponent(src.join("\t")) + +"%20%7C%7C%7C%20"+encodeURIComponent(tgt.join("\t")) + // ??? } else { if (source.value != "") { alert("Please provide a post-edit."); @@ -216,12 +202,9 @@ function Next() } } - // show 'working' message - status.style.display = "block"; - // confirm to server if (document.getElementById("init").value != "") { - var xhr_confirm = CreateCORSRequest('get', base_url+"/confirm"); + var xhr_confirm = CreateCORSRequest('get', base_url+":"+port+"/confirm"); xhr_confirm.send(); // FIXME: handle errors } @@ -236,62 +219,55 @@ function Next() document.getElementById("init").value = 1; // for pause() // translation system is currently handling a request // FIXME: maybe poll server for result? - if (xhr.responseText == "locked") { - alert("Translation system is locked, try again in a moment (reload page and click 'Start/Continue' again)."); - status.style.display = "none"; + if (xhr.responseText == "locked") { + alert("Translation system is locked, try again in a moment (reload page and click 'Start/Continue' again)."); + not_working(); - return; - } - var x = xhr.responseText.split("\t"); - if (x == "fi") { // done -> hide/disable functional elements + return; + } + + data = JSON.parse(xhr.responseText) + document.getElementById("data").value = xhr.responseText; + + // done, disable interface + if (data["fin"]) { raw_source_textarea.setAttribute("disabled", "disabled"); target_textarea.setAttribute("disabled", "disabled"); status.style.display = "none"; button.innerHTML = "Session finished, thank you!"; button.setAttribute("disabled", "disabled"); pause_button.setAttribute("disabled", "disabled"); - removeClass(document.getElementById("seg_"+current_seg_id.value), "bold"); - } else { - // got response: OOV\tseg id\ttoken_1\ttoken_2\t... - // 0 1 2 3 ... - if (x[0] == "OOV") { - var s = ""; - for (var i=2; i < x.length; i++) { - s += x[i].substr(1,x[i].length-2); - if (i+1 < x.length) { - s += "; "; - } - raw_source_textarea.value = s; - } - // update interface - status.style.display = "none"; - button.innerHTML = "Correct"; - button.removeAttribute("disabled"); - target_textarea.removeAttribute("disabled", "disabled"); - pause_button.removeAttribute("disabled", "disabled"); - target_textarea.value = ""; - target_textarea.focus(); - target_textarea.selectionStart = 0; - target_textarea.selectionEnd = 0; - oov_correct.value = true; - var id = x[1]; - document.getElementById("seg_"+id).className += " bold"; - if (id > 0) { - removeClass(document.getElementById("seg_"+(id-1)), "bold"); - } - if (document.getElementById("displayed_oov_hint").value == "false") { - alert("Please translate the following words (separated by semicolons) to enable translation of the next sentence. Source words are always in lower case. Use correct casing for suggested translation."); - document.getElementById("displayed_oov_hint").value = true; - } - - return; + if (current_seg_id.value) + removeClass(document.getElementById("seg_"+current_seg_id.value), "bold"); + + return; + + // enter OOV correct mode + } else if (data["oovs"]) { + var append_to = document.getElementById("oov_fields"); + for (var i=0; i<data["oovs"].length; i++) { + var node_src = document.createElement("input"); + var node_tgt = document.createElement("input"); + node_src.type = "text"; + node_tgt.type = "text"; + node_src.id = "oov_src"+i; + node_tgt.id = "oov_tgt"+i; + node_src.value = data["oovs"][i]; + node_src.setAttribute("disabled", "disabled"); + append_to.appendChild(node_src); + append_to.appendChild(node_tgt); } - // got response: seg id\tsource\ttranslation\traw source - // 0 1 2 3 - var id = x[0]; - var src = x[1]; - var translation = x[2]; - var raw_source = x[3]; + oov_correct.value = true; + + $("#oov_form").css("display", "block"); + not_working(); + + // translation mode + } else { + var id = data["progress"]; + var src = data["source"]; + var translation = data["transl_detok"]; + var raw_source = data["raw_source"]; // update interface oov_correct.value = false; @@ -318,19 +294,64 @@ function Next() last_post_edit.value = translation; // confirm to server - //var xhr_confirm = CreateCORSRequest('get', base_url+"/confirm"); - //xhr_confirm.send(); // FIXME: handle errors + var xhr_confirm = CreateCORSRequest('get', base_url+":"+port+"/confirm"); + xhr_confirm.send(); // FIXME: handle errors + // load data into graphical UI + if (ui_type == "g") { + load_data(); + } + + // start timer Timer.start(); } }; - xhr.onerror = function() { - // FIXME: do something reasonable - }; + xhr.onerror = function() {}; // FIXME: do something reasonable xhr.send(); // send 'next' request return; } +/* + * init text interface + * + */ +function init_text_editor() +{ + document.getElementById("target_textarea").value = ""; + document.getElementById("raw_source_textarea").value = ""; + document.getElementById("target_textarea").setAttribute("disabled", "disabled"); + + return false; +} + +/* + * init site + * + */ +window.onload = function () +{ + // reset vars + document.getElementById("source").value = ""; + document.getElementById("current_seg_id").value = ""; + document.getElementById("paused").value = ""; + document.getElementById("oov_correct").value = false; + document.getElementById("displayed_oov_hint").value = false; + document.getElementById("init").value = ""; + + not_working(); + + ui_type = document.getElementById("ui_type").value; + + // graphical derivation editor + if (ui_type == "g") { + document.getElementById("derivation_editor").style.display = "block"; + // text based editor + } else { + init_text_editor(); + document.getElementById("textboxes").style.display = "block"; + } +}; + @@ -7,6 +7,7 @@ require 'nanomsg' require 'zipf' require 'json' require 'haml' +require_relative './derivation_to_json/derivation_to_json' # ############################################################################# # Load configuration file and setup global variables @@ -24,11 +25,12 @@ end # ############################################################################# # Daemons # ############################################################################# +DIR="/fast_scratch/simianer/lfpe/" $daemons = { - :tokenizer => "/fast_scratch/simianer/lfpe/lfpe/util/wrapper.rb -a tokenize -S '__ADDR__' -e #{EXTERNAL} -l #{TARGET_LANG}", - :detokenizer => "/fast_scratch/simianer/lfpe/lfpe/util/wrapper.rb -a detokenize -S '__ADDR__' -e #{EXTERNAL} -l #{TARGET_LANG}", - :truecaser => "/fast_scratch/simianer/lfpe/lfpe/util/wrapper.rb -a truecase -S '__ADDR__' -e #{EXTERNAL} -t #{SESSION_DIR}/truecase.model", - :dtrain => "#{CDEC}/training/dtrain/dtrain_net_interface -c #{SESSION_DIR}/dtrain.ini -d #{WORK_DIR}/dtrain.debug.json -o #{WORK_DIR}/weights -a '__ADDR__'", + :tokenizer => "#{DIR}/lfpe/util/wrapper.rb -a tokenize -S '__ADDR__' -e #{EXTERNAL} -l #{TARGET_LANG}", + :detokenizer => "#{DIR}/lfpe/util/wrapper.rb -a detokenize -S '__ADDR__' -e #{EXTERNAL} -l #{TARGET_LANG}", + :truecaser => "#{DIR}/lfpe/util/wrapper.rb -a truecase -S '__ADDR__' -e #{EXTERNAL} -t #{SESSION_DIR}/truecase.model", + :dtrain => "#{CDEC}/training/dtrain/dtrain_net_interface -c #{SESSION_DIR}/dtrain.ini -d #{WORK_DIR}/dtrain.debug.json -o #{WORK_DIR}/weights -a '__ADDR__' -E", :extractor => "python -m cdec.sa.extract -c #{SESSION_DIR}/sa.ini --online -u -S '__ADDR__'", :aligner_fwd => "#{CDEC}/word-aligner/net_fa -f #{SESSION_DIR}/forward.params -m #{FWD_MEAN_SRCLEN_MULT} -T #{FWD_TENSION} --sock_url '__ADDR__'", :aligner_back => "#{CDEC}/word-aligner/net_fa -f #{SESSION_DIR}/backward.params -m #{BACK_MEAN_SRCLEN_MULT} -T #{BACK_TENSION} --sock_url '__ADDR__'", @@ -47,7 +49,6 @@ set :max_age, "1728000" set :expose_headers, ['Content-Type'] set :public_folder, File.dirname(__FILE__) + '/static' - # ############################################################################# # Helper functions # ############################################################################# @@ -144,8 +145,8 @@ get '/next' do # (receive post-edit, update models), send next translation grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar" src, tgt = splitpipe(params[:correct]) tgt = cleanstr(tgt) - src = src.split(';').map { |i| i.strip } - tgt = tgt.split(';').map { |i| i.strip } + src = src.split("\t").map { |i| i.strip } + tgt = tgt.split("\t").map { |i| i.strip } src.each_with_index { |s,i| next if s==''||tgt[i]=='' a = "" @@ -212,7 +213,7 @@ get '/next' do # (receive post-edit, update models), send next translation if !source # input is done -> displays 'Thank you!' logmsg :server, "end of input, sending 'fi'" $lock = false - return "fi" # return + return {'fin'=>true}.to_json # return elsif !$confirmed logmsg :server, "locked, re-sending last reply" $lock = false @@ -231,7 +232,11 @@ get '/next' do # (receive post-edit, update models), send next translation if NOMT $lock = false logmsg :server, "no mt" - return "#{$db['progress']}\t#{source}\t \t#{raw_source}" # return + obj = Hash.new + obj["progress"] = $db["progress"] + obj["source"] = source + obj["raw_source"] = raw_source + return obj.to_json # return end # 1. generate grammar for current sentence grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar" @@ -254,8 +259,11 @@ get '/next' do # (receive post-edit, update models), send next translation } oovs.uniq! logmsg :server, "OOVs: #{oovs.to_s}" - if oovs.size > 0 - $last_reply = "OOV\t#{$db['progress']}\t#{oovs.map{|i| "\"#{i}\""}.join("\t")}" + if oovs.size > 0 # OOVs FIXME + obj = Hash.new + obj["oovs"] = oovs + obj["progress"] = $db['progress'] + $last_reply = obj.to_json logmsg :server, "OOV reply: '#{$last_reply}'" $lock = false $confirmed = false @@ -263,20 +271,35 @@ get '/next' do # (receive post-edit, update models), send next translation end # 3. translation msg = "act:translate ||| <seg grammar=\"#{grammar}\"> #{source} </seg>" - transl = send_recv :dtrain, msg - $db['mt_raw'] << transl + obj_str = proc_deriv(send_recv(:dtrain, msg)) + obj = JSON.parse obj_str + obj["transl"] = obj["target_groups"].join " " # 4. detokenizer - transl = send_recv :detokenizer, transl - $db['mt'] << transl + obj["transl_detok"] = send_recv(:detokenizer, obj["transl"]).strip + obj["target_groups"].each_index { |j| + prev = obj["target_groups"][j][0] + obj["target_groups"][j] = send_recv(:detokenizer, obj["target_groups"][j]).strip + obj["target_groups"][j][0]=prev if j > 0 + } + obj["source"] = source + obj["progress"]= $db['progress'] + obj["raw_source"] = raw_source + w_idx = 0 + obj["source_groups"].each_index { |j| + a = obj["source_groups"][j].split + a.each_with_index + } + # save + # FIXME # 5. reply - $last_reply = "#{$db['progress']}\t#{source}\t#{transl.strip}\t#{raw_source}" + $last_reply = obj.to_json $lock = false $confirmed = false logmsg :server, "response: '#{$last_reply}'" return $last_reply # return end - return "oh oh" # return FIXME: do something sensible + return "{}" # return [ERROR] end get '/debug' do # debug view |