diff options
-rw-r--r-- | index.php | 68 | ||||
-rw-r--r-- | lfpe.css | 74 | ||||
-rw-r--r-- | lfpe.js | 146 | ||||
-rwxr-xr-x | run_server | 2 | ||||
-rwxr-xr-x | server.rb | 187 |
5 files changed, 342 insertions, 135 deletions
@@ -1,54 +1,88 @@ <html> <head> <meta charset="utf-8" /> - <title>Post-editing application</title> + <title>Post-editing application (key: <?php echo $_GET["key"]; ?></title> <script src="lfpe.js"></script> <link rel="stylesheet" type="text/css" href="lfpe.css" /> </head> -<body onload="Next()"> +<body onload=""> +<!-- Wrapper --> <div id="wrapper"> +<!-- Header --> <div id="header"> -<img src="img/logo_neu_204x107.jpg" /> -<img id="cl" src="img/institut_cl.png" /> + <img id="uni" src="img/logo_neu_204x107.jpg" /> + <img id="cl" src="img/institut_cl.png" /> </div> +<!-- /Header --> +<!-- Source and target --> <table> <tr> <td align="right">Source:</td> - <td><textarea id="src" name="source" cols="80" rows="1" readonly></textarea></td> + <td><textarea id="raw_source_textarea" name="source" cols="80" rows="1" disabled></textarea></td> </tr> <tr> <td align="right">Target:</td> - <td><textarea id="trgt" name="target" cols="80" rows="1" onkeypress="submit(event)"></textarea></td> + <td><textarea id="target_textarea" name="target" cols="80" rows="1" onkeypress="catch_return(event)"></textarea></td> </tr> </table> +<!-- /Source and target --> -<p> - <button id="next" type="button" onclick="Next()">Next</button> -</p> +<!-- Next button --> +<div> + <button id="pause_button" type="button" onclick="pause()">Pause</button> + <button id="next" type="button" onclick="Next()">Start/Continue</button> + <span id="status"><strong>Working</strong> <img src="img/ajax-loader-large.gif" width="20px" /></span> +</div> +<!-- /Next button --> + +<!-- Document overview --> +<div> +<strong>Document overview</strong> +<table id="overview"> +<?php +$j = file_get_contents("/fast_scratch/simianer/lfpe/example_session/".$_GET["key"].".json"); # FIXME: from database +$a = json_decode($j); +$i = 0; +foreach($a->raw_source_segments as $s) { + if ($i <= $a->progress) { + echo "<tr id='seg_".$i."'><td>".($i+1).".</td><td>".$s."</td><td class='seg_text' id='seg_".$i."_t'>".$a->post_edits_raw[$i]."</td></tr>"; + } else { + echo "<tr id='seg_".$i."'><td>".($i+1).".</td><td>".$s."</td><td class='seg_text' id='seg_".$i."_t'></td></tr>"; + } + $i += 1; +} +?> +</table> +</div> +<!-- /Document overview --> -<p id="desc"> +<!-- Help --> +<p id="help"> <strong>Help</strong><br /> Press the 'Next' to submit your post-edit and to request the next segment to translate (or just press enter when the 'Target' textarea is in focus). </p> +<!-- /Help --> +<!-- Footer --> <p id="footer"> ©2015 Heidelberg University/Institute for Computational Linguistics </p> +<!-- /Footer --> </div> +<!-- /Wrapper --> - -<p id="translating_status"> - <strong>Translating</strong> <img src="img/ajax-loader-large.gif" width="20px" /> -</p> - - -<textarea style="display:none" id="src_pp"></textarea> +<!-- Data --> +<textarea style="display:none" id="key"><?php echo $_GET['key']; ?></textarea> +<textarea style="display:none" id="source"></textarea> +<textarea style="display:none" id="current_seg_id">0</textarea> +<textarea style="display:none" id="paused">0</textarea> +<!-- /Data --> </body> </html> @@ -1,57 +1,65 @@ -textarea { - font-size: 20px +html { + font-family: Arial, Helvetica, sans-serif; + font-size: 18px } -p#translating_status { - display: none; - margin-left: 4.2em; - /*margin-top: -5.5em*/ +textarea { + font-size: 20px; + width: 100% } button { - margin: 1em; - padding: .25em; + margin: 1em; + padding: .25em; background: #fff; font-weight: bold } -p#fi { - display: none; -} +.bold { font-weight:bold } div#wrapper { - margin: 2em; + margin: 2em; padding: 1em; - border: 1px dashed #000 -} - -div#header { - margin-bottom: 2em; + border: 1px dashed #000 } -p#footer { - text-align:right; - font-size: 0.5em; - font-weight:bold; - margin:0; - padding:0; - color:#ccc +span#status { + display: none; + float: right } -p#desc { - font-size:0.8em; - width:40%; - color: #ccc; - text-align:justify -} +/* Document overview */ +table#overview { font-size:80% } +table#overview td.seg_text { width: 45% } +table#overview td { border-bottom: 1px solid #000 } +/* /Document overview */ -p#desc:hover { - color: #000 +p#footer { + text-align: right; + font-size: .5em; + font-weight: bold; + margin: 0; + padding: 0; + color: #303030 } +/* Header */ +div#header { margin-bottom: 2em } +img#uni {} img#cl { margin-bottom:20px; margin-left:10px; - vertical-align:bottom; + vertical-align:bottom +} +/* /Header */ + +/* Help */ +p#help { + font-size: .8em; + width: 40%; + color: #ccc; + text-align: justify } +p#help:hover { color: #000 } +/* /Help */ @@ -1,20 +1,17 @@ -function CreateCORSRequest(method, url) { +function CreateCORSRequest(method, url) +{ var xhr = new XMLHttpRequest(); if ("withCredentials" in xhr) { - // XHR for Chrome/Firefox/Opera/Safari. xhr.open(method, url, true); - } else if (typeof XDomainRequest != "undefined") { - // XDomainRequest for IE. - xhr = new XDomainRequest(); - xhr.open(method, url); } else { - // CORS not supported. xhr = null; } + return xhr; } -function submit(e) { +function catch_return(e) +{ if (e.keyCode == 13) { e.preventDefault(); Next(); @@ -23,49 +20,124 @@ function submit(e) { return false; } +function pause() +{ + var paused = document.getElementById("paused"); + var button = document.getElementById("pause_button"); + var next_button = document.getElementById("next"); + if (paused.value == 0) { + button.innerHTML = "Unpause"; + paused.value = 1; + next.setAttribute("disabled", "disabled"); + } else { + button.innerHTML = "Pause"; + paused.value = 0; + next.removeAttribute("disabled"); + } +} + function Next() { - url = "http://coltrane.cl.uni-heidelberg.de:60666/next"; - var pe = document.getElementById("trgt").value; - if (pe != "") { - var src = document.getElementById("src_pp").value; - url += "?example="+src+" %7C%7C%7C "+pe; + // elements + var button = document.getElementById("next"); + var target_textarea = document.getElementById("target_textarea") + var raw_source_textarea = document.getElementById("raw_source_textarea"); + var current_seg_id = document.getElementById("current_seg_id"); + var source = document.getElementById("source"); + var status = document.getElementById("status"); + + // disable button and textarea + button.setAttribute("disabled", "disabled"); + target_textarea.setAttribute("disabled", "disabled"); + + var base_url = "http://coltrane.cl.uni-heidelberg.de:60666"; // FIXME: variable + + var key = document.getElementById("key").value; + next_url = base_url+"/next?key="+key; + var post_edit = target_textarea.value; + if (post_edit != "") { + // compose request + next_url += "&example="+source.value+" %7C%7C%7C "+post_edit; + // update document overview + document.getElementById("seg_"+(current_seg_id.value)+"_t").innerHTML=post_edit; + } else { + if (source.value != "") { + alert("Error: 1"); + } + // FIXME: do something reasonable } - document.getElementById("translating_status").style.display = "block"; - var xhr = CreateCORSRequest('get', url); + + // show 'working' message + status.style.display = "block"; + + // build request + var xhr = CreateCORSRequest('get', next_url); if (!xhr) { - alert('CORS not supported'); - return; + alert("Error: 2"); + // FIXME: do something reasonable } + // 'next' request's callbacks xhr.onload = function() { + /* + * translation system is currently handling another request + * FIXME: maybe poll server for result? + * + */ + if (xhr.responseText == "locked") { + alert("Translation system is locked, try again in a moment (reload page and click 'Start/Continue' again)."); + document.getElementById("status").style.display = "none"; + + return; + } + // got response: seg id\tsource\ttranslation\traw source + // 0 1 2 3 var x = xhr.responseText.split("\t"); - if (x == "fi") { - document.getElementById("src").style.display = "none"; - document.getElementById("trgt").style.display = "none"; - document.getElementById("translating_status").style.display = "none"; - document.getElementById("next").innerHTML = "Thank you!"; - document.getElementById("next").disabled = true; + if (x == "fi") { // done, hide/disable functional elements + raw_source_textarea.style.display = "none"; + target_textarea.style.display = "none"; + status.style.display = "none"; + button.innerHTML = "Session finished, thank you!"; + button.setAttribute("disabled", "disabled"); + document.getElementById("pause_button").setAttribute("disabled", "disabled"); + document.getElementById("seg_"+current_seg_id.value).className = ""; } else { - document.getElementById("src_pp").value = x[0]; - document.getElementById("src").value = x[2]; - document.getElementById("src").rows = Math.round(x[2].length/80)+1; - var firstLetter = x[1][0].toUpperCase(); - var rest = x[1].substring(1); - var t = firstLetter + rest; - document.getElementById("trgt").value = t; - document.getElementById("trgt").rows = Math.round(x[1].length/80)+1; - document.getElementById("translating_status").style.display = "none"; - document.getElementById("trgt").focus(); - document.getElementById("trgt").selectionStart = 0; - document.getElementById("trgt").selectionEnd = 0; + var id = x[0]; + var src = x[1]; + var translation = x[2]; + var raw_source = x[3]; + + // update interface + status.style.display = "none"; + target_textarea.value = translation; + raw_source_textarea.value = raw_source; + button.innerHTML = "Next"; + button.removeAttribute("disabled"); + target_textarea.removeAttribute("disabled", "disabled"); + document.getElementById("seg_"+id).className = "bold"; + if (x[0] > 0) { + document.getElementById("seg_"+(id-1)).className = ""; + } + target_textarea.rows = Math.round(translation.length/80)+1; + raw_source_textarea.rows = Math.round(raw_source.length/80)+1; + target_textarea.focus(); + target_textarea.selectionStart = 0; + target_textarea.selectionEnd = 0; + + // remember aux data in DOM + current_seg_id.value = id; + source.value = src; + + // confirm to server + var xhr_confirm = CreateCORSRequest('get', "http://coltrane.cl.uni-heidelberg.de:60666/confirm"); + xhr_confirm.send(); // FIXME: handle errors } }; xhr.onerror = function() { - alert('Error'); + // FIXME: do something reasonable }; - xhr.send(); + xhr.send(); // send 'next' request } @@ -2,5 +2,5 @@ export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib export PYTHONPATH=~/.local/lib/python2.7/site-packages -ruby server.rb ../example/conf.rb &>server.rb.out +./kill;./kill;rm /tmp/server.lock; ./server.rb ../example_pattr/conf.rb @@ -2,14 +2,40 @@ require 'sinatra' require 'sinatra/cross_origin' +require "sinatra/reloader" require 'nanomsg' require 'zipf' require 'digest' +require 'json' +# load configuration file and setup global variables require_relative "#{ARGV[0]}" -INPUT = ReadFile.readlines INPUT_FILE -INPUT_RAW = ReadFile.readlines RAW_INPUT_FILE -`mkdir -p #{WORK_DIR}/g` +$lock = false # lock if currently learning/translating +$last_reply = nil # cache last reply +$confirmed = true # client received translation? +if !FileTest.exist? LOCK_FILE + $db = {} # FIXME: that is supposed to be a database connection + $env = {} +end + +$daemons = { + :detokenizer => "/fast_scratch/simianer/lfpe/lfpe/de-tok.rb -a D -S '__ADDR__' -p #{SCRIPTS_DIR} -l #{TARGET_LANG}", + :tokenizer => "/fast_scratch/simianer/lfpe/lfpe/de-tok.rb -a T -S '__ADDR__' -p #{SCRIPTS_DIR} -l #{TARGET_LANG}", + :extractor => "python -m cdec.sa.extract -c #{DATA_DIR}/sa.ini --online -u -S '__ADDR__'", + :aligner_fwd => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/forward.params -m #{FWD_MEAN_SRCLEN_MULT} -T #{FWD_TENSION} --sock_url '__ADDR__'", + :aligner_back => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/backward.params -m #{BACK_MEAN_SRCLEN_MULT} -T #{BACK_TENSION} --sock_url '__ADDR__'", + :atools => "#{CDEC_NET}/utils/atools_net -c grow-diag-final-and -S '__ADDR__'", + :dtrain => "#{CDEC_NET}/training/dtrain/dtrain_net_interface -c #{DATA_DIR}/dtrain.ini -o #{WORK_DIR}/weights.final -a '__ADDR__'" +} + +# setup Sinatra +set :bind, SERVER_IP +set :port, WEB_PORT +set :allow_origin, :any +set :allow_methods, [:get, :post, :options] +set :allow_credentials, true +set :max_age, "1728000" +set :expose_headers, ['Content-Type'] def start_daemon cmd, name, addr STDERR.write "> starting #{name} daemon\n" @@ -24,112 +50,179 @@ def start_daemon cmd, name, addr return sock, pid end -def stop_all_daemons env +def stop_all_daemons STDERR.write "shutting down all daemons\n" - env.each { |name,p| + $env.each { |name,p| p[:socket].send "shutdown" STDERR.write "< #{name} is #{p[:socket].recv}\n" } end -daemons = { - :extractor => "python -m cdec.sa.extract -c #{DATA_DIR}/sa.ini --online -u -S '__ADDR__'", - :aligner_fwd => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/forward.params -m #{FWD_MEAN_SRCLEN_MULT} -T #{FWD_TENSION} --sock_url '__ADDR__'", - :aligner_back => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/backward.params -m #{BACK_MEAN_SRCLEN_MULT} -T #{BACK_TENSION} --sock_url '__ADDR__'", - :atools => "#{CDEC_NET}/utils/atools_net -c grow-diag-final-and -S '__ADDR__'", - :dtrain => "#{CDEC_NET}/training/dtrain/dtrain_net_interface -c #{DATA_DIR}/dtrain.ini -o #{WORK_DIR}/weights.final -a '__ADDR__'" ##{DTRAIN_EXTRA}" -} +def update_database # FIXME: real database + $db['progress'] += 1 + j = JSON.generate $db + f = WriteFile.new DB_FILE + f.write j.to_s + f.close +end -env = {} -port = BEGIN_PORT_RANGE -daemons.each { |name,cmd| - sock, pid = start_daemon cmd, name, "tcp://127.0.0.1:#{port}" - env[name] = { :socket => sock, :pid => pid } - port += 1 -} +def init + # database connection + $db = JSON.parse ReadFile.read DB_FILE + # working directory + `mkdir -p #{WORK_DIR}/g` + # setup environment, start daemons + port = BEGIN_PORT_RANGE + $daemons.each { |name,cmd| + sock, pid = start_daemon cmd, name, "tcp://127.0.0.1:#{port}" + $env[name] = { :socket => sock, :pid => pid } + port += 1 + } + `touch #{LOCK_FILE}` +end -set :bind, SERVER_IP -set :port, WEB_PORT -set :allow_origin, :any -set :allow_methods, [:get, :post, :options] -set :allow_credentials, true -set :max_age, "1728000" -set :expose_headers, ['Content-Type'] +init if !FileTest.exist?(LOCK_FILE) get '/' do cross_origin "Nothing to see here." end +# receive post-edit, send translation get '/next' do cross_origin + return "locked" if $lock + $lock = true + key = params[:key] # FIXME: do something with it if params[:example] source, reference = params[:example].strip.split(" ||| ") + # tokenize, lowercase + $db['post_edits_raw'] << reference.strip + $env[:tokenizer][:socket].send reference + STDERR.write "[tokenizer] waiting ...\n" + reference = $env[:tokenizer][:socket].recv.force_encoding("UTF-8").strip + STDERR.write "[tokenizer] < received tokenized reference: '#{reference}'\n" + reference.downcase! + # save post-edits + $db['post_edits'] << reference.strip # update weights grammar = "#{WORK_DIR}/g/#{Digest::SHA256.hexdigest(source)}.grammar" annotated_source = "<seg grammar=\"#{grammar}\"> #{source} </seg>" msg = "#{annotated_source} ||| #{reference}" STDERR.write "[dtrain] > sending '#{msg}' for update\n" - env[:dtrain][:socket].send msg + $env[:dtrain][:socket].send msg STDERR.write "[dtrain] waiting for confirmation ...\n" - STDERR.write "[dtrain] < says it's #{env[:dtrain][:socket].recv}\n" + STDERR.write "[dtrain] < says it's #{$env[:dtrain][:socket].recv}\n" # update grammar extractor # get forward alignment msg = "#{source} ||| #{reference}" STDERR.write "[aligner_fwd] > sending '#{msg}' for forced alignment\n" - env[:aligner_fwd][:socket].send msg + $env[:aligner_fwd][:socket].send msg STDERR.write "[aligner_fwd] waiting for alignment ...\n" - a_fwd = env[:aligner_fwd][:socket].recv.strip + a_fwd = $env[:aligner_fwd][:socket].recv.strip STDERR.write "[aligner_fwd] < got alignment: '#{a_fwd}'\n" # get backward alignment msg = "#{source} ||| #{reference}" STDERR.write "[aligner_back] > sending '#{msg}' for forced alignment\n" - env[:aligner_back][:socket].send msg + $env[:aligner_back][:socket].send msg STDERR.write "[aligner_back] waiting for alignment ...\n" - a_back = env[:aligner_back][:socket].recv.strip + a_back = $env[:aligner_back][:socket].recv.strip STDERR.write "[aligner_back] < got alignment: '#{a_back}'\n" - # combine alignments + # symmetrize alignment msg = "#{a_fwd} ||| #{a_back}" STDERR.write "[atools] > sending '#{msg}' to combine alignments\n" - env[:atools][:socket].send msg + $env[:atools][:socket].send msg STDERR.write "[atools] waiting for alignment ...\n" - a = env[:atools][:socket].recv.strip + a = $env[:atools][:socket].recv.strip STDERR.write "[atools] < got alignment '#{a}'\n" # actual extractor msg = "TEST ||| #{source} ||| #{reference} ||| #{a}" STDERR.write "[extractor] > sending '#{msg}' for learning\n" - env[:extractor][:socket].send "TEST ||| #{source} ||| #{reference} ||| #{a}" + $env[:extractor][:socket].send "TEST ||| #{source} ||| #{reference} ||| #{a}" STDERR.write "[extractor] waiting for confirmation ...\n" - STDERR.write "[extractor] < got '#{env[:extractor][:socket].recv}'\n" + STDERR.write "[extractor] < got '#{$env[:extractor][:socket].recv}'\n" + update_database end - source = INPUT.shift - raw_source = INPUT_RAW.shift + source = $db['source_segments'][$db['progress']] + raw_source = $db['raw_source_segments'][$db['progress']] if !source # input is done -> displays 'Thank you!' STDERR.write ">>> end of input, sending 'fi'\n" - "fi" + $lock = false + return "fi" + elsif !$confirmed + $lock = false + return $last_reply else # translate next sentence source.strip! # generate grammar for current sentence grammar = "#{WORK_DIR}/g/#{Digest::SHA256.hexdigest(source)}.grammar" # FIXME: keep grammars? msg = "- ||| #{source} ||| #{grammar}" # FIXME: content identifier useful? STDERR.write "[extractor] > asking to generate grammar: '#{msg}'\n" - env[:extractor][:socket].send msg + $env[:extractor][:socket].send msg STDERR.write "[extractor] waiting for confirmation ...\n" - STDERR.write "[extractor] < says it generated #{env[:extractor][:socket].recv.strip}\n" + STDERR.write "[extractor] < says it generated #{$env[:extractor][:socket].recv.force_encoding("UTF-8").strip}\n" # translation msg = "act:translate ||| <seg grammar=\"#{grammar}\"> #{source} </seg>" STDERR.write "[dtrain] > asking to translate: '#{msg}'\n" - env[:dtrain][:socket].send msg + $env[:dtrain][:socket].send msg STDERR.write "[dtrain] waiting for translation ...\n" - transl = env[:dtrain][:socket].recv.force_encoding "UTF-8" + transl = $env[:dtrain][:socket].recv.force_encoding "UTF-8" STDERR.write "[dtrain] < received translation: '#{transl}'\n" - "#{source}\t#{transl.strip}\t#{raw_source}" + # detokenizer + $env[:detokenizer][:socket].send transl + STDERR.write "[detokenizer] waiting ...\n" + transl = $env[:detokenizer][:socket].recv.force_encoding("UTF-8").strip + STDERR.write "[detokenizer] < received final translation: '#{transl}'\n" + # reply + $last_reply = "#{$db['progress']}\t#{source}\t#{transl.strip}\t#{raw_source}" + $lock = false + $confirmed = false + STDERR.write ">>> response: '#{$last_reply}'" + return $last_reply end + + return "oh oh" # FIXME: do something sensible +end + +# client confirms received translation +get '/confirm' do + cross_origin + STDERR.write "confirmed = #{$confirmed}\n" + $confirmed = true + + return "#{$confirmed}" end # stop daemons and shut down server get '/shutdown' do - stop_all_daemons env - exit + stop_all_daemons + + "ready to shutdown" +end + +# reset current session +get '/reset' do + return "locked" if $lock + $db = JSON.parse ReadFile.read DB_FILE # FIXME: database .. + $db['post_edits'].clear + $db['post_edits_raw'].clear + update_database + $db['progress'] = 0 + $confirmed = true + + return "#{$db.to_s}" +end + +# load other db file than configured +get '/load/:name' do + return "locked" if $lock + $db = JSON.parse ReadFile.read "/fast_scratch/simianer/lfpe/example_pattr/#{params[:name]}.json.original" + $db['post_edits'].clear + $db['post_edits_raw'].clear + update_database + $db['progress'] = 0 + $confirmed = true + + "#{$db.to_s}" end |