summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2015-07-09 22:24:22 +0200
committerPatrick Simianer <p@simianer.de>2015-07-09 22:24:22 +0200
commit7597c74ac4a890f6c3afcf48f1ec350be35793f2 (patch)
treef3c7fd146e1f754bf40d0f3fb16fa65280edf82f
parent18071552ee0c1f2efab2263e179c5c4dbdbbf6d5 (diff)
new features
-rw-r--r--index.php3
-rw-r--r--interface.php42
-rw-r--r--lfpe.css25
-rw-r--r--lfpe.js187
-rwxr-xr-xserver.rb115
-rw-r--r--views/debug.haml1
6 files changed, 264 insertions, 109 deletions
diff --git a/index.php b/index.php
index 477602f..12492a4 100644
--- a/index.php
+++ b/index.php
@@ -19,6 +19,8 @@
<p>Beta test:
<select class="small">
<option value="beta_test_A" onclick="document.getElementById('key').value=this.value;">A</option>
+ <option value="beta_test_A_nolearn" onclick="document.getElementById('key').value=this.value;">A (no learning)</option>
+ <option value="beta_test_A_nomt" onclick="document.getElementById('key').value=this.value;">A (no MT)</option>
<option value="beta_test_A_sparse" onclick="document.getElementById('key').value=this.value;">A (sparse)</option>
<option value="beta_test_B" onclick="document.getElementById('key').value=this.value;">B</option>
<option value="beta_test_B_sparse" onclick="document.getElementById('key').value=this.value;">B (sparse)</option>
@@ -33,7 +35,6 @@
<?php include("footer.php"); ?>
-
</body>
</html>
diff --git a/interface.php b/interface.php
index 30a44b1..55b94ad 100644
--- a/interface.php
+++ b/interface.php
@@ -1,7 +1,7 @@
<html>
<head>
<meta charset="utf-8" />
- <title>Post-editing application (key: <?php echo $_GET["key"]; ?></title>
+ <title>Post-editing application (Session: #<?php echo $_GET["key"]; ?>)</title>
<script src="lfpe.js"></script>
<link rel="stylesheet" type="text/css" href="lfpe.css" />
</head>
@@ -23,45 +23,51 @@
</table>
<!-- /Source and target -->
-<!-- Next button -->
+<!-- Buttons -->
<div>
<button id="pause_button" type="button" onclick="pause()">Pause</button>
<button id="next" type="button" onclick="Next()">Start/Continue</button>
<span id="status"><strong>Working</strong> <img src="img/ajax-loader-large.gif" width="20px" /></span>
</div>
-<!-- /Next button -->
+<!-- /Buttons -->
-<!-- Document overview -->
+<!-- Session overview -->
<div id="overview_wrapper">
-<strong>Document overview</strong>
+<strong>Session overview</strong>
<table id="overview">
<?php
$SESSION_DIR="/fast_scratch/simianer/lfpe/sessions";
-$j = file_get_contents($SESSION_DIR."/".$_GET["key"]."/data.json");
-$a = json_decode($j);
+$json = file_get_contents($SESSION_DIR."/".$_GET["key"]."/data.json");
+$db = json_decode($json);
+
+$class = "";
$i = 0;
-foreach($a->raw_source_segments as $s) {
- if ($i <= $a->progress) {
- echo "<tr id='seg_".$i."'><td>".($i+1).".</td><td>".$s."</td><td class='seg_text' id='seg_".$i."_t'>".$a->post_edits_raw[$i]."</td></tr>";
+foreach($db->raw_source_segments as $s) {
+ if (in_array($i, $db->docs)) {
+ $class = "doc_title";
} else {
- echo "<tr id='seg_".$i."'><td>".($i+1).".</td><td>".$s."</td><td class='seg_text' id='seg_".$i."_t'></td></tr>";
+ $class = "";
+ }
+ $translation = "";
+ if ($i <= $db->progress) {
+ $translation = $db->post_edits_raw[$i];
}
+ echo "<tr class='".$class."' id='seg_".$i."'><td>".($i+1).".</td><td>".$s."</td><td class='seg_text' id='seg_".$i."_t'>".$translation."</td></tr>";
$i += 1;
}
?>
</table>
</div>
-<!-- /Document overview -->
+<!-- /Session overview -->
<!-- Help -->
<div id="help">
<strong>Help</strong><br />
<p>Press the 'Next' to submit your post-edit and to request the next segment to translate
-(or just press enter when the 'Target' textarea is in focus). You can stop your session at any time and continue it later; The 'Pause'
-button has currently no function. Please only use <em>one</em> browser window at once.<br/>
+(or just press enter when the 'Target' text area is in focus). You can stop your session at any time and continue it later; However, if you have to pause your session, wait until the activity notification disappears and then press 'Pause'. Alternatively, reload the site. Please only use <em>one</em> browser window at once.<br/>
The interface was tested with Firefox 31.</p>
-<p class="xtrasmall">Support: <a href="mailto://simianer &auml;t cl.uni-heidelberg.de">Mail</a></p>
-<p class="xtrasmall">Session: #<?php echo $_GET["key"]; ?> | <a href="http://coltrane.cl.uni-heidelberg.de:<?php echo $a->port; ?>/debug" target="_blank">Debug</a></p>
+<p class="xtrasmall">Support: <a href="mailto://simianer@cl.uni-heidelberg.de">Mail</a></p>
+<p class="xtrasmall">Session: #<?php echo $_GET["key"]; ?> | <a href="http://coltrane.cl.uni-heidelberg.de:<?php echo $db->port; ?>/debug" target="_blank">Debug</a></p>
</div>
<!-- /Help -->
@@ -75,6 +81,8 @@ The interface was tested with Firefox 31.</p>
<textarea style="display:none" id="source"></textarea>
<textarea style="display:none" id="current_seg_id">0</textarea>
<textarea style="display:none" id="paused">0</textarea>
-<textarea style="display:none" id="port"><?php echo $a->port; ?></textarea>
+<textarea style="display:none" id="oov_correct">0</textarea>
+<textarea style="display:none" id="displayed_oov_hint">0</textarea>
+<textarea style="display:none" id="port"><?php echo $db->port; ?></textarea>
<!-- /Data -->
diff --git a/lfpe.css b/lfpe.css
index 3fc0a2c..4609715 100644
--- a/lfpe.css
+++ b/lfpe.css
@@ -30,21 +30,13 @@ span#status {
float: right
}
-/* Document overview */
+/* Session overview */
div#overview_wrapper { margin-top:1em }
table#overview { font-size:.8em }
table#overview td.seg_text { width: 45% }
table#overview td { border-bottom: 1px solid #000 }
-/* /Document overview */
-
-p#footer {
- text-align: right;
- font-size: .5em;
- font-weight: bold;
- margin: 0;
- padding: 0;
- color: #303030
-}
+table#overview .doc_title { background-color: #eee }
+/* /Session overview */
/* Header */
div#header {
@@ -59,6 +51,17 @@ img#cl {
}
/* /Header */
+/* Footer */
+p#footer {
+ text-align: right;
+ font-size: .5em;
+ font-weight: bold;
+ margin: 0;
+ padding: 0;
+ color: #303030
+}
+/* /Footer */
+
/* Help */
div#help {
margin-top: 2em;
diff --git a/lfpe.js b/lfpe.js
index 49db94d..7edb305 100644
--- a/lfpe.js
+++ b/lfpe.js
@@ -1,15 +1,56 @@
+/*
+ * Timer
+ *
+ */
+var Timer = {
+ start_t: 0,
+ pause_start_t: 0,
+ pause_acc_t: 0,
+ paused: false,
+
+ start: function() {
+ this.start_t = Date.now();
+ this.pause_start_t = 0;
+ this.pause_acc_t = 0;
+ this.paused = false;
+ },
+ pause: function() {
+ this.paused = true;
+ this.pause_start_t = Date.now();
+ },
+ unpause: function() {
+ this.paused = false;
+ this.pause_acc_t += Date.now()-this.pause_start_t;
+ this.pause_start_t = 0;
+ },
+ get: function() {
+ return (Date.now()-this.start_t)-this.pause_acc_t;
+ }
+}
+
+/*
+ * init site
+ *
+ */
function init()
{
- document.getElementById("target_textarea").value = "";
+ document.getElementById("target_textarea").value = "";
document.getElementById("raw_source_textarea").value = "";
- document.getElementById("source").value = "";
- document.getElementById("current_seg_id").value = "";
- document.getElementById("paused").value = "";
- document.getElementById("next").removeAttribute("disabled");
+ document.getElementById("source").value = "";
+ document.getElementById("current_seg_id").value = "";
+ document.getElementById("paused").value = "";
+ document.getElementById("oov_correct").value = false;
+ document.getElementById("displayed_oov_hint").value = false;
+ document.getElementById("next").removeAttribute("disabled");
+ document.getElementById("pause_button").removeAttribute("disabled");
return false;
}
+/*
+ * cross-site request
+ *
+ */
function CreateCORSRequest(method, url)
{
var xhr = new XMLHttpRequest();
@@ -22,6 +63,10 @@ function CreateCORSRequest(method, url)
return xhr;
}
+/*
+ * no newline on return in textarea
+ *
+ */
function catch_return(e)
{
if (e.keyCode == 13) {
@@ -32,52 +77,68 @@ function catch_return(e)
return false;
}
+/*
+ * pause/unpause timer
+ *
+ */
function pause()
{
- var paused = document.getElementById("paused");
- var button = document.getElementById("pause_button");
+ var paused = document.getElementById("paused");
+ var button = document.getElementById("pause_button");
var next_button = document.getElementById("next");
if (paused.value == 0) {
button.innerHTML = "Unpause";
paused.value = 1;
next.setAttribute("disabled", "disabled");
+ Timer.pause();
} else {
button.innerHTML = "Pause";
paused.value = 0;
next.removeAttribute("disabled");
+ Timer.unpause();
}
}
+/*
+ * next button
+ *
+ */
function Next()
{
// elements
- var button = document.getElementById("next");
- var target_textarea = document.getElementById("target_textarea")
+ var button = document.getElementById("next");
+ var pause_button = document.getElementById("pause_button");
+ var target_textarea = document.getElementById("target_textarea")
var raw_source_textarea = document.getElementById("raw_source_textarea");
- var current_seg_id = document.getElementById("current_seg_id");
- var source = document.getElementById("source");
- var status = document.getElementById("status");
+ var current_seg_id = document.getElementById("current_seg_id");
+ var source = document.getElementById("source");
+ var status = document.getElementById("status");
+ var oov_correct = document.getElementById("oov_correct");
// disable button and textarea
- button.setAttribute("disabled", "disabled");
+ button.setAttribute("disabled", "disabled");
+ pause_button.setAttribute("disabled", "disabled");
target_textarea.setAttribute("disabled", "disabled");
- var port = document.getElementById("port").value;
+ // get metadata stored in DOM
+ var port = document.getElementById("port").value;
var base_url = "http://coltrane.cl.uni-heidelberg.de:"+port;
+ var key = document.getElementById("key").value;
- var key = document.getElementById("key").value;
next_url = base_url+"/next?key="+key;
+
var post_edit = target_textarea.value;
- if (post_edit != "") {
+ if (oov_correct.value=="false" && post_edit != "") {
// compose request
- next_url += "&example="+source.value+" %7C%7C%7C "+post_edit;
+ next_url += "&example="+source.value+" %7C%7C%7C "+post_edit+"&duration="+Timer.get();
// update document overview
document.getElementById("seg_"+(current_seg_id.value)+"_t").innerHTML=post_edit;
+ } else if (oov_correct.value=="true" && post_edit != "") {
+ next_url += "&correct="+raw_source_textarea.value+" %7C%7C%7C "+post_edit
} else {
if (source.value != "") {
- alert("Error: 1");
+ alert("Error: 1"); // FIXME: do something reasonable
}
- // FIXME: do something reasonable
}
// show 'working' message
@@ -86,64 +147,98 @@ function Next()
// build request
var xhr = CreateCORSRequest('get', next_url);
if (!xhr) {
- alert("Error: 2");
- // FIXME: do something reasonable
+ alert("Error: 2"); // FIXME: do something reasonable
}
// 'next' request's callbacks
xhr.onload = function() {
- /*
- * translation system is currently handling another request
- * FIXME: maybe poll server for result?
- *
- */
+ // translation system is currently handling a request
+ // FIXME: maybe poll server for result?
if (xhr.responseText == "locked") {
alert("Translation system is locked, try again in a moment (reload page and click 'Start/Continue' again).");
- document.getElementById("status").style.display = "none";
+ status.style.display = "none";
return;
}
- // got response: seg id\tsource\ttranslation\traw source
- // 0 1 2 3
var x = xhr.responseText.split("\t");
- if (x == "fi") { // done, hide/disable functional elements
+ if (x == "fi") { // done -> hide/disable functional elements
raw_source_textarea.style.display = "none";
- target_textarea.style.display = "none";
- status.style.display = "none";
- button.innerHTML = "Session finished, thank you!";
- button.setAttribute("disabled", "disabled");
- document.getElementById("pause_button").setAttribute("disabled", "disabled");
+ target_textarea.style.display = "none";
+ status.style.display = "none";
+ button.innerHTML = "Session finished, thank you!";
+ button.setAttribute("disabled", "disabled");
+ pause_button.setAttribute("disabled", "disabled");
document.getElementById("seg_"+current_seg_id.value).className = "";
} else {
- var id = x[0];
- var src = x[1];
+ // got response: OOV\tseg id\ttoken_1\ttoken_2\t...
+ // 0 1 2 3 ...
+ if (x[0] == "OOV") {
+ var s = "";
+ for (var i=2; i < x.length; i++) {
+ s += x[i].substr(1,x[i].length-2);
+ if (i+1 < x.length) {
+ s += "; ";
+ }
+ raw_source_textarea.value = s;
+ }
+ // update interface
+ status.style.display = "none";
+ button.innerHTML = "Correct";
+ button.removeAttribute("disabled");
+ target_textarea.removeAttribute("disabled", "disabled");
+ pause_button.removeAttribute("disabled", "disabled");
+ target_textarea.value = "";
+ target_textarea.focus();
+ target_textarea.selectionStart = 0;
+ target_textarea.selectionEnd = 0;
+ oov_correct.value = true;
+ var id = x[1];
+ document.getElementById("seg_"+id).className = "bold";
+ if (id > 0) {
+ document.getElementById("seg_"+(id-1)).className = "";
+ }
+ if (document.getElementById("displayed_oov_hint").value == "false") {
+ alert("Please translate the following words (separated by semicolons) to enable translation of the next sentence. Use proper casing.");
+ document.getElementById("displayed_oov_hint").value = true;
+ }
+
+ return;
+ }
+ // got response: seg id\tsource\ttranslation\traw source
+ // 0 1 2 3
+ var id = x[0];
+ var src = x[1];
var translation = x[2];
- var raw_source = x[3];
+ var raw_source = x[3];
// update interface
- status.style.display = "none";
- target_textarea.value = translation;
+ oov_correct.value = false;
+ status.style.display = "none";
+ target_textarea.value = translation;
raw_source_textarea.value = raw_source;
- button.innerHTML = "Next";
- button.removeAttribute("disabled");
+ button.innerHTML = "Next";
+ button.removeAttribute("disabled");
target_textarea.removeAttribute("disabled", "disabled");
+ pause_button.removeAttribute("disabled", "disabled");
document.getElementById("seg_"+id).className = "bold";
if (x[0] > 0) {
document.getElementById("seg_"+(id-1)).className = "";
}
- target_textarea.rows = Math.round(translation.length/80)+1;
+ target_textarea.rows = Math.round(translation.length/80)+1;
raw_source_textarea.rows = Math.round(raw_source.length/80)+1;
target_textarea.focus();
target_textarea.selectionStart = 0;
- target_textarea.selectionEnd = 0;
+ target_textarea.selectionEnd = 0;
// remember aux data in DOM
current_seg_id.value = id;
- source.value = src;
+ source.value = src;
// confirm to server
var xhr_confirm = CreateCORSRequest('get', base_url+"/confirm");
xhr_confirm.send(); // FIXME: handle errors
+
+ Timer.start();
}
};
@@ -152,5 +247,7 @@ function Next()
};
xhr.send(); // send 'next' request
+
+ return;
}
diff --git a/server.rb b/server.rb
index 8614e0b..a655e07 100755
--- a/server.rb
+++ b/server.rb
@@ -15,6 +15,7 @@ require_relative "#{ARGV[0]}" # load configuration for this session
$lock = false # lock if currently learning/translating
$last_reply = nil # cache last reply
$confirmed = true # client received translation?
+$additional_rules = []
if !FileTest.exist? LOCK_FILE # locked?
$db = {} # FIXME: that is supposed to be a database connection
$env = {} # environment variables (socket connections to daemons)
@@ -131,6 +132,18 @@ get '/next' do # (receive post-edit, update models), send next translation
$lock = true
key = params[:key] # FIXME: do something with it, e.g. simple auth
+ if params[:correct]
+ logmsg :server, "correct: #{params[:correct]}"
+ grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar"
+ src, tgt = splitpipe(params[:correct])
+ src = src.split(';').map { |i| i.strip }
+ tgt = tgt.split(';').map { |i| i.strip }
+ src.each_with_index { |s,i|
+ rule = "[X] ||| #{s} ||| #{tgt[i]} ||| ForceRule=1 ||| 0-0"
+ $additional_rules << rule
+ }
+ $confirmed = true
+ end
# received post-edit -> update models
# 0. save raw post-edit
# 1. tokenize
@@ -147,6 +160,7 @@ get '/next' do # (receive post-edit, update models), send next translation
# 0. save raw post-edit
source, reference = params[:example].strip.split(" ||| ")
$db['post_edits_raw'] << reference.strip
+ $db['durations'] << params['duration'].to_i
# 1. tokenize
reference = send_recv :tokenizer, reference
# 2. truecase
@@ -154,6 +168,7 @@ get '/next' do # (receive post-edit, update models), send next translation
# 3. save processed post-edits
logmsg "db", "saving processed post-edit"
$db['post_edits'] << reference.strip
+ if !NOLEARN && !NOMT
# 4. update weights
grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar"
annotated_source = "<seg grammar=\"#{grammar}\"> #{source} </seg>"
@@ -169,12 +184,13 @@ get '/next' do # (receive post-edit, update models), send next translation
send_recv :extractor, "default_context ||| #{source} ||| #{reference} ||| #{a}"
# 6. update database
logmsg "db", "updating database"
+ end
update_database
end
source = $db['source_segments'][$db['progress']]
raw_source = $db['raw_source_segments'][$db['progress']]
if !source # input is done -> displays 'Thank you!'
- logmsg "server", "end of input, sending 'fi'"
+ logmsg :server, "end of input, sending 'fi'"
$lock = false
return "fi" # return
elsif !$confirmed
@@ -183,20 +199,55 @@ get '/next' do # (receive post-edit, update models), send next translation
return $last_reply # return
else
# translate next sentence
+ # 0. no mt?
# 1. generate grammar
- # 2. translate
- # 3. detokenize
- # 4. reply
+ # 2. check for OOV
+ # 3. translate
+ # 4. detokenize
+ # 5. reply
source.strip!
+ # 0. no mt?
+ if NOMT
+ $lock = false
+ logmsg :server, "no mt"
+ return "#{$db['progress']}\t#{source}\t \t#{raw_source}" # return
+ end
# 1. generate grammar for current sentence
grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar"
send_recv :extractor, "default_context ||| #{source} ||| #{grammar}"
- # 2. translation
+ # - additional rules
+ $additional_rules.each { |rule|
+ logmsg :server, "adding rule '#{rule}' to grammar '#{grammar}'"
+ `echo "#{rule}" >> #{grammar}`
+ }
+ # 2. check for OOV
+ src_r = ReadFile.readlines(grammar).map {
+ |l| splitpipe(l)[1].strip.split
+ }.flatten.uniq
+ oovs = []
+ source.split.each { |token|
+ if !src_r.include? token
+ oovs << token
+ logmsg :server, "OOV token: '#{token}'"
+ end
+ }
+ oovs.uniq!
+ logmsg :server, "OOVs: #{oovs.to_s}"
+ if oovs.size > 0
+ $last_reply = "OOV\t#{$db['progress']}\t#{oovs.map{|i| "\"#{i}\""}.join("\t")}"
+ logmsg :server, "OOV reply: '#{$last_reply}'"
+ $lock = false
+ $confirmed = false
+ return $last_reply # return
+ end
+ # 3. translation
msg = "act:translate ||| <seg grammar=\"#{grammar}\"> #{source} </seg>"
transl = send_recv :dtrain, msg
- # 3. detokenizer
+ $db['mt_raw'] << transl
+ # 4. detokenizer
transl = send_recv :detokenizer, transl
- # 4. reply
+ $db['mt'] << transl
+ # 5. reply
$last_reply = "#{$db['progress']}\t#{source}\t#{transl.strip}\t#{raw_source}"
$lock = false
$confirmed = false
@@ -222,25 +273,6 @@ get '/confirm' do # client confirms received translation
return "#{$confirmed}"
end
-get '/shutdown' do # stop daemons and shut down server
- logmsg :server, "shutting down daemons"
- stop_all_daemons
-
- return "stopped all daemons, ready to shutdown"
-end
-
-get '/reset' do # reset current session
- return "locked" if $lock
- $db = JSON.parse ReadFile.read DB_FILE # FIXME: proper database
- $db['post_edits'].clear
- $db['post_edits_raw'].clear
- update_database
- $db['progress'] = 0
- $confirmed = true
-
- return "#{$db.to_s}"
-end
-
get '/set_learning_rate/:rate' do
logmsg :server, "set learning rate, #{params[:rate]}"
return "locked" if $lock
@@ -257,6 +289,18 @@ get '/set_sparse_learning_rate/:rate' do
return "done"
end
+get '/reset' do # reset current session
+ return "locked" if $lock
+ $db = JSON.parse ReadFile.read DB_FILE # FIXME: proper database
+ $db['post_edits'].clear
+ $db['post_edits_raw'].clear
+ update_database
+ $db['progress'] = 0
+ $confirmed = true
+
+ return "#{$db.to_s}"
+end
+
get '/reset_weights' do
logmsg :server, "reset weights"
return "locked" if $lock
@@ -273,15 +317,16 @@ get '/reset_extractor' do
return "done"
end
-get '/load/:name' do # load other db file than configured
- return "locked" if $lock
- $db = JSON.parse ReadFile.read "#{SESSION_DIR}/#{params[:name]}.json.original"
- $db['post_edits'].clear
- $db['post_edits_raw'].clear
- update_database
- $db['progress'] = 0
- $confirmed = true
+get '/reset_add_rules' do
+ $additional_rules.clear
+
+ return "done"
+end
- "#{$db.to_s}"
+get '/shutdown' do # stop daemons and shut down server
+ logmsg :server, "shutting down daemons"
+ stop_all_daemons
+
+ return "stopped all daemons, ready to shutdown"
end
diff --git a/views/debug.haml b/views/debug.haml
index 45cb598..bd2076f 100644
--- a/views/debug.haml
+++ b/views/debug.haml
@@ -11,6 +11,7 @@
%a{:href => "/reset", :target => "_blank"} reset progress,
%a{:href => "/reset_weights", :target => "_blank"} reset weights,
%a{:href => "/reset_extractor", :target => "_blank"} reset extractor,
+ %a{:href => "/reset_add_rules", :target => "_blank"} reset add. rules,
%a{:href => "/shutdown", :target => "_blank"} shutdown,
%span learning rate
%select