diff options
-rw-r--r-- | index.php | 3 | ||||
-rw-r--r-- | interface.php | 42 | ||||
-rw-r--r-- | lfpe.css | 25 | ||||
-rw-r--r-- | lfpe.js | 187 | ||||
-rwxr-xr-x | server.rb | 115 | ||||
-rw-r--r-- | views/debug.haml | 1 |
6 files changed, 264 insertions, 109 deletions
@@ -19,6 +19,8 @@ <p>Beta test: <select class="small"> <option value="beta_test_A" onclick="document.getElementById('key').value=this.value;">A</option> + <option value="beta_test_A_nolearn" onclick="document.getElementById('key').value=this.value;">A (no learning)</option> + <option value="beta_test_A_nomt" onclick="document.getElementById('key').value=this.value;">A (no MT)</option> <option value="beta_test_A_sparse" onclick="document.getElementById('key').value=this.value;">A (sparse)</option> <option value="beta_test_B" onclick="document.getElementById('key').value=this.value;">B</option> <option value="beta_test_B_sparse" onclick="document.getElementById('key').value=this.value;">B (sparse)</option> @@ -33,7 +35,6 @@ <?php include("footer.php"); ?> - </body> </html> diff --git a/interface.php b/interface.php index 30a44b1..55b94ad 100644 --- a/interface.php +++ b/interface.php @@ -1,7 +1,7 @@ <html> <head> <meta charset="utf-8" /> - <title>Post-editing application (key: <?php echo $_GET["key"]; ?></title> + <title>Post-editing application (Session: #<?php echo $_GET["key"]; ?>)</title> <script src="lfpe.js"></script> <link rel="stylesheet" type="text/css" href="lfpe.css" /> </head> @@ -23,45 +23,51 @@ </table> <!-- /Source and target --> -<!-- Next button --> +<!-- Buttons --> <div> <button id="pause_button" type="button" onclick="pause()">Pause</button> <button id="next" type="button" onclick="Next()">Start/Continue</button> <span id="status"><strong>Working</strong> <img src="img/ajax-loader-large.gif" width="20px" /></span> </div> -<!-- /Next button --> +<!-- /Buttons --> -<!-- Document overview --> +<!-- Session overview --> <div id="overview_wrapper"> -<strong>Document overview</strong> +<strong>Session overview</strong> <table id="overview"> <?php $SESSION_DIR="/fast_scratch/simianer/lfpe/sessions"; -$j = file_get_contents($SESSION_DIR."/".$_GET["key"]."/data.json"); -$a = json_decode($j); +$json = file_get_contents($SESSION_DIR."/".$_GET["key"]."/data.json"); +$db = json_decode($json); + +$class = ""; $i = 0; -foreach($a->raw_source_segments as $s) { - if ($i <= $a->progress) { - echo "<tr id='seg_".$i."'><td>".($i+1).".</td><td>".$s."</td><td class='seg_text' id='seg_".$i."_t'>".$a->post_edits_raw[$i]."</td></tr>"; +foreach($db->raw_source_segments as $s) { + if (in_array($i, $db->docs)) { + $class = "doc_title"; } else { - echo "<tr id='seg_".$i."'><td>".($i+1).".</td><td>".$s."</td><td class='seg_text' id='seg_".$i."_t'></td></tr>"; + $class = ""; + } + $translation = ""; + if ($i <= $db->progress) { + $translation = $db->post_edits_raw[$i]; } + echo "<tr class='".$class."' id='seg_".$i."'><td>".($i+1).".</td><td>".$s."</td><td class='seg_text' id='seg_".$i."_t'>".$translation."</td></tr>"; $i += 1; } ?> </table> </div> -<!-- /Document overview --> +<!-- /Session overview --> <!-- Help --> <div id="help"> <strong>Help</strong><br /> <p>Press the 'Next' to submit your post-edit and to request the next segment to translate -(or just press enter when the 'Target' textarea is in focus). You can stop your session at any time and continue it later; The 'Pause' -button has currently no function. Please only use <em>one</em> browser window at once.<br/> +(or just press enter when the 'Target' text area is in focus). You can stop your session at any time and continue it later; However, if you have to pause your session, wait until the activity notification disappears and then press 'Pause'. Alternatively, reload the site. Please only use <em>one</em> browser window at once.<br/> The interface was tested with Firefox 31.</p> -<p class="xtrasmall">Support: <a href="mailto://simianer ät cl.uni-heidelberg.de">Mail</a></p> -<p class="xtrasmall">Session: #<?php echo $_GET["key"]; ?> | <a href="http://coltrane.cl.uni-heidelberg.de:<?php echo $a->port; ?>/debug" target="_blank">Debug</a></p> +<p class="xtrasmall">Support: <a href="mailto://simianer@cl.uni-heidelberg.de">Mail</a></p> +<p class="xtrasmall">Session: #<?php echo $_GET["key"]; ?> | <a href="http://coltrane.cl.uni-heidelberg.de:<?php echo $db->port; ?>/debug" target="_blank">Debug</a></p> </div> <!-- /Help --> @@ -75,6 +81,8 @@ The interface was tested with Firefox 31.</p> <textarea style="display:none" id="source"></textarea> <textarea style="display:none" id="current_seg_id">0</textarea> <textarea style="display:none" id="paused">0</textarea> -<textarea style="display:none" id="port"><?php echo $a->port; ?></textarea> +<textarea style="display:none" id="oov_correct">0</textarea> +<textarea style="display:none" id="displayed_oov_hint">0</textarea> +<textarea style="display:none" id="port"><?php echo $db->port; ?></textarea> <!-- /Data --> @@ -30,21 +30,13 @@ span#status { float: right } -/* Document overview */ +/* Session overview */ div#overview_wrapper { margin-top:1em } table#overview { font-size:.8em } table#overview td.seg_text { width: 45% } table#overview td { border-bottom: 1px solid #000 } -/* /Document overview */ - -p#footer { - text-align: right; - font-size: .5em; - font-weight: bold; - margin: 0; - padding: 0; - color: #303030 -} +table#overview .doc_title { background-color: #eee } +/* /Session overview */ /* Header */ div#header { @@ -59,6 +51,17 @@ img#cl { } /* /Header */ +/* Footer */ +p#footer { + text-align: right; + font-size: .5em; + font-weight: bold; + margin: 0; + padding: 0; + color: #303030 +} +/* /Footer */ + /* Help */ div#help { margin-top: 2em; @@ -1,15 +1,56 @@ +/* + * Timer + * + */ +var Timer = { + start_t: 0, + pause_start_t: 0, + pause_acc_t: 0, + paused: false, + + start: function() { + this.start_t = Date.now(); + this.pause_start_t = 0; + this.pause_acc_t = 0; + this.paused = false; + }, + pause: function() { + this.paused = true; + this.pause_start_t = Date.now(); + }, + unpause: function() { + this.paused = false; + this.pause_acc_t += Date.now()-this.pause_start_t; + this.pause_start_t = 0; + }, + get: function() { + return (Date.now()-this.start_t)-this.pause_acc_t; + } +} + +/* + * init site + * + */ function init() { - document.getElementById("target_textarea").value = ""; + document.getElementById("target_textarea").value = ""; document.getElementById("raw_source_textarea").value = ""; - document.getElementById("source").value = ""; - document.getElementById("current_seg_id").value = ""; - document.getElementById("paused").value = ""; - document.getElementById("next").removeAttribute("disabled"); + document.getElementById("source").value = ""; + document.getElementById("current_seg_id").value = ""; + document.getElementById("paused").value = ""; + document.getElementById("oov_correct").value = false; + document.getElementById("displayed_oov_hint").value = false; + document.getElementById("next").removeAttribute("disabled"); + document.getElementById("pause_button").removeAttribute("disabled"); return false; } +/* + * cross-site request + * + */ function CreateCORSRequest(method, url) { var xhr = new XMLHttpRequest(); @@ -22,6 +63,10 @@ function CreateCORSRequest(method, url) return xhr; } +/* + * no newline on return in textarea + * + */ function catch_return(e) { if (e.keyCode == 13) { @@ -32,52 +77,68 @@ function catch_return(e) return false; } +/* + * pause/unpause timer + * + */ function pause() { - var paused = document.getElementById("paused"); - var button = document.getElementById("pause_button"); + var paused = document.getElementById("paused"); + var button = document.getElementById("pause_button"); var next_button = document.getElementById("next"); if (paused.value == 0) { button.innerHTML = "Unpause"; paused.value = 1; next.setAttribute("disabled", "disabled"); + Timer.pause(); } else { button.innerHTML = "Pause"; paused.value = 0; next.removeAttribute("disabled"); + Timer.unpause(); } } +/* + * next button + * + */ function Next() { // elements - var button = document.getElementById("next"); - var target_textarea = document.getElementById("target_textarea") + var button = document.getElementById("next"); + var pause_button = document.getElementById("pause_button"); + var target_textarea = document.getElementById("target_textarea") var raw_source_textarea = document.getElementById("raw_source_textarea"); - var current_seg_id = document.getElementById("current_seg_id"); - var source = document.getElementById("source"); - var status = document.getElementById("status"); + var current_seg_id = document.getElementById("current_seg_id"); + var source = document.getElementById("source"); + var status = document.getElementById("status"); + var oov_correct = document.getElementById("oov_correct"); // disable button and textarea - button.setAttribute("disabled", "disabled"); + button.setAttribute("disabled", "disabled"); + pause_button.setAttribute("disabled", "disabled"); target_textarea.setAttribute("disabled", "disabled"); - var port = document.getElementById("port").value; + // get metadata stored in DOM + var port = document.getElementById("port").value; var base_url = "http://coltrane.cl.uni-heidelberg.de:"+port; + var key = document.getElementById("key").value; - var key = document.getElementById("key").value; next_url = base_url+"/next?key="+key; + var post_edit = target_textarea.value; - if (post_edit != "") { + if (oov_correct.value=="false" && post_edit != "") { // compose request - next_url += "&example="+source.value+" %7C%7C%7C "+post_edit; + next_url += "&example="+source.value+" %7C%7C%7C "+post_edit+"&duration="+Timer.get(); // update document overview document.getElementById("seg_"+(current_seg_id.value)+"_t").innerHTML=post_edit; + } else if (oov_correct.value=="true" && post_edit != "") { + next_url += "&correct="+raw_source_textarea.value+" %7C%7C%7C "+post_edit } else { if (source.value != "") { - alert("Error: 1"); + alert("Error: 1"); // FIXME: do something reasonable } - // FIXME: do something reasonable } // show 'working' message @@ -86,64 +147,98 @@ function Next() // build request var xhr = CreateCORSRequest('get', next_url); if (!xhr) { - alert("Error: 2"); - // FIXME: do something reasonable + alert("Error: 2"); // FIXME: do something reasonable } // 'next' request's callbacks xhr.onload = function() { - /* - * translation system is currently handling another request - * FIXME: maybe poll server for result? - * - */ + // translation system is currently handling a request + // FIXME: maybe poll server for result? if (xhr.responseText == "locked") { alert("Translation system is locked, try again in a moment (reload page and click 'Start/Continue' again)."); - document.getElementById("status").style.display = "none"; + status.style.display = "none"; return; } - // got response: seg id\tsource\ttranslation\traw source - // 0 1 2 3 var x = xhr.responseText.split("\t"); - if (x == "fi") { // done, hide/disable functional elements + if (x == "fi") { // done -> hide/disable functional elements raw_source_textarea.style.display = "none"; - target_textarea.style.display = "none"; - status.style.display = "none"; - button.innerHTML = "Session finished, thank you!"; - button.setAttribute("disabled", "disabled"); - document.getElementById("pause_button").setAttribute("disabled", "disabled"); + target_textarea.style.display = "none"; + status.style.display = "none"; + button.innerHTML = "Session finished, thank you!"; + button.setAttribute("disabled", "disabled"); + pause_button.setAttribute("disabled", "disabled"); document.getElementById("seg_"+current_seg_id.value).className = ""; } else { - var id = x[0]; - var src = x[1]; + // got response: OOV\tseg id\ttoken_1\ttoken_2\t... + // 0 1 2 3 ... + if (x[0] == "OOV") { + var s = ""; + for (var i=2; i < x.length; i++) { + s += x[i].substr(1,x[i].length-2); + if (i+1 < x.length) { + s += "; "; + } + raw_source_textarea.value = s; + } + // update interface + status.style.display = "none"; + button.innerHTML = "Correct"; + button.removeAttribute("disabled"); + target_textarea.removeAttribute("disabled", "disabled"); + pause_button.removeAttribute("disabled", "disabled"); + target_textarea.value = ""; + target_textarea.focus(); + target_textarea.selectionStart = 0; + target_textarea.selectionEnd = 0; + oov_correct.value = true; + var id = x[1]; + document.getElementById("seg_"+id).className = "bold"; + if (id > 0) { + document.getElementById("seg_"+(id-1)).className = ""; + } + if (document.getElementById("displayed_oov_hint").value == "false") { + alert("Please translate the following words (separated by semicolons) to enable translation of the next sentence. Use proper casing."); + document.getElementById("displayed_oov_hint").value = true; + } + + return; + } + // got response: seg id\tsource\ttranslation\traw source + // 0 1 2 3 + var id = x[0]; + var src = x[1]; var translation = x[2]; - var raw_source = x[3]; + var raw_source = x[3]; // update interface - status.style.display = "none"; - target_textarea.value = translation; + oov_correct.value = false; + status.style.display = "none"; + target_textarea.value = translation; raw_source_textarea.value = raw_source; - button.innerHTML = "Next"; - button.removeAttribute("disabled"); + button.innerHTML = "Next"; + button.removeAttribute("disabled"); target_textarea.removeAttribute("disabled", "disabled"); + pause_button.removeAttribute("disabled", "disabled"); document.getElementById("seg_"+id).className = "bold"; if (x[0] > 0) { document.getElementById("seg_"+(id-1)).className = ""; } - target_textarea.rows = Math.round(translation.length/80)+1; + target_textarea.rows = Math.round(translation.length/80)+1; raw_source_textarea.rows = Math.round(raw_source.length/80)+1; target_textarea.focus(); target_textarea.selectionStart = 0; - target_textarea.selectionEnd = 0; + target_textarea.selectionEnd = 0; // remember aux data in DOM current_seg_id.value = id; - source.value = src; + source.value = src; // confirm to server var xhr_confirm = CreateCORSRequest('get', base_url+"/confirm"); xhr_confirm.send(); // FIXME: handle errors + + Timer.start(); } }; @@ -152,5 +247,7 @@ function Next() }; xhr.send(); // send 'next' request + + return; } @@ -15,6 +15,7 @@ require_relative "#{ARGV[0]}" # load configuration for this session $lock = false # lock if currently learning/translating $last_reply = nil # cache last reply $confirmed = true # client received translation? +$additional_rules = [] if !FileTest.exist? LOCK_FILE # locked? $db = {} # FIXME: that is supposed to be a database connection $env = {} # environment variables (socket connections to daemons) @@ -131,6 +132,18 @@ get '/next' do # (receive post-edit, update models), send next translation $lock = true key = params[:key] # FIXME: do something with it, e.g. simple auth + if params[:correct] + logmsg :server, "correct: #{params[:correct]}" + grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar" + src, tgt = splitpipe(params[:correct]) + src = src.split(';').map { |i| i.strip } + tgt = tgt.split(';').map { |i| i.strip } + src.each_with_index { |s,i| + rule = "[X] ||| #{s} ||| #{tgt[i]} ||| ForceRule=1 ||| 0-0" + $additional_rules << rule + } + $confirmed = true + end # received post-edit -> update models # 0. save raw post-edit # 1. tokenize @@ -147,6 +160,7 @@ get '/next' do # (receive post-edit, update models), send next translation # 0. save raw post-edit source, reference = params[:example].strip.split(" ||| ") $db['post_edits_raw'] << reference.strip + $db['durations'] << params['duration'].to_i # 1. tokenize reference = send_recv :tokenizer, reference # 2. truecase @@ -154,6 +168,7 @@ get '/next' do # (receive post-edit, update models), send next translation # 3. save processed post-edits logmsg "db", "saving processed post-edit" $db['post_edits'] << reference.strip + if !NOLEARN && !NOMT # 4. update weights grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar" annotated_source = "<seg grammar=\"#{grammar}\"> #{source} </seg>" @@ -169,12 +184,13 @@ get '/next' do # (receive post-edit, update models), send next translation send_recv :extractor, "default_context ||| #{source} ||| #{reference} ||| #{a}" # 6. update database logmsg "db", "updating database" + end update_database end source = $db['source_segments'][$db['progress']] raw_source = $db['raw_source_segments'][$db['progress']] if !source # input is done -> displays 'Thank you!' - logmsg "server", "end of input, sending 'fi'" + logmsg :server, "end of input, sending 'fi'" $lock = false return "fi" # return elsif !$confirmed @@ -183,20 +199,55 @@ get '/next' do # (receive post-edit, update models), send next translation return $last_reply # return else # translate next sentence + # 0. no mt? # 1. generate grammar - # 2. translate - # 3. detokenize - # 4. reply + # 2. check for OOV + # 3. translate + # 4. detokenize + # 5. reply source.strip! + # 0. no mt? + if NOMT + $lock = false + logmsg :server, "no mt" + return "#{$db['progress']}\t#{source}\t \t#{raw_source}" # return + end # 1. generate grammar for current sentence grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar" send_recv :extractor, "default_context ||| #{source} ||| #{grammar}" - # 2. translation + # - additional rules + $additional_rules.each { |rule| + logmsg :server, "adding rule '#{rule}' to grammar '#{grammar}'" + `echo "#{rule}" >> #{grammar}` + } + # 2. check for OOV + src_r = ReadFile.readlines(grammar).map { + |l| splitpipe(l)[1].strip.split + }.flatten.uniq + oovs = [] + source.split.each { |token| + if !src_r.include? token + oovs << token + logmsg :server, "OOV token: '#{token}'" + end + } + oovs.uniq! + logmsg :server, "OOVs: #{oovs.to_s}" + if oovs.size > 0 + $last_reply = "OOV\t#{$db['progress']}\t#{oovs.map{|i| "\"#{i}\""}.join("\t")}" + logmsg :server, "OOV reply: '#{$last_reply}'" + $lock = false + $confirmed = false + return $last_reply # return + end + # 3. translation msg = "act:translate ||| <seg grammar=\"#{grammar}\"> #{source} </seg>" transl = send_recv :dtrain, msg - # 3. detokenizer + $db['mt_raw'] << transl + # 4. detokenizer transl = send_recv :detokenizer, transl - # 4. reply + $db['mt'] << transl + # 5. reply $last_reply = "#{$db['progress']}\t#{source}\t#{transl.strip}\t#{raw_source}" $lock = false $confirmed = false @@ -222,25 +273,6 @@ get '/confirm' do # client confirms received translation return "#{$confirmed}" end -get '/shutdown' do # stop daemons and shut down server - logmsg :server, "shutting down daemons" - stop_all_daemons - - return "stopped all daemons, ready to shutdown" -end - -get '/reset' do # reset current session - return "locked" if $lock - $db = JSON.parse ReadFile.read DB_FILE # FIXME: proper database - $db['post_edits'].clear - $db['post_edits_raw'].clear - update_database - $db['progress'] = 0 - $confirmed = true - - return "#{$db.to_s}" -end - get '/set_learning_rate/:rate' do logmsg :server, "set learning rate, #{params[:rate]}" return "locked" if $lock @@ -257,6 +289,18 @@ get '/set_sparse_learning_rate/:rate' do return "done" end +get '/reset' do # reset current session + return "locked" if $lock + $db = JSON.parse ReadFile.read DB_FILE # FIXME: proper database + $db['post_edits'].clear + $db['post_edits_raw'].clear + update_database + $db['progress'] = 0 + $confirmed = true + + return "#{$db.to_s}" +end + get '/reset_weights' do logmsg :server, "reset weights" return "locked" if $lock @@ -273,15 +317,16 @@ get '/reset_extractor' do return "done" end -get '/load/:name' do # load other db file than configured - return "locked" if $lock - $db = JSON.parse ReadFile.read "#{SESSION_DIR}/#{params[:name]}.json.original" - $db['post_edits'].clear - $db['post_edits_raw'].clear - update_database - $db['progress'] = 0 - $confirmed = true +get '/reset_add_rules' do + $additional_rules.clear + + return "done" +end - "#{$db.to_s}" +get '/shutdown' do # stop daemons and shut down server + logmsg :server, "shutting down daemons" + stop_all_daemons + + return "stopped all daemons, ready to shutdown" end diff --git a/views/debug.haml b/views/debug.haml index 45cb598..bd2076f 100644 --- a/views/debug.haml +++ b/views/debug.haml @@ -11,6 +11,7 @@ %a{:href => "/reset", :target => "_blank"} reset progress, %a{:href => "/reset_weights", :target => "_blank"} reset weights, %a{:href => "/reset_extractor", :target => "_blank"} reset extractor, + %a{:href => "/reset_add_rules", :target => "_blank"} reset add. rules, %a{:href => "/shutdown", :target => "_blank"} shutdown, %span learning rate %select |