From 334c3820c673c9226513b69df93b43ac37308bd1 Mon Sep 17 00:00:00 2001
From: Patrick Simianer 
Date: Wed, 10 Jun 2015 12:31:09 +0200
Subject: stable system
---
 index.php  |  68 ++++++++++++++++------
 lfpe.css   |  74 +++++++++++++-----------
 lfpe.js    | 146 +++++++++++++++++++++++++++++++++++------------
 run_server |   2 +-
 server.rb  | 187 +++++++++++++++++++++++++++++++++++++++++++++----------------
 5 files changed, 342 insertions(+), 135 deletions(-)
diff --git a/index.php b/index.php
index 4d79798..3947b42 100644
--- a/index.php
+++ b/index.php
@@ -1,54 +1,88 @@
 
 
    
-  Post-editing application 
+  Post-editing application (key:  
   
    
 
 
-
+
 
+
 
 
+
 
+
 
+
 
+
 
-
-  Next 
-
+
+
+  
Pause 
+  
Start/Continue 
+  
Working  
+
+
+
+
+
+
Document overview 
+
+raw_source_segments as $s) {
+  if ($i <= $a->progress) {
+    echo "".($i+1).". ".$s." ".$a->post_edits_raw[$i]."  ";
+  } else {
+    echo "".($i+1).". ".$s."  ";
+  }
+  $i += 1;
+}
+?>
+
+
+
 
-
+
+
 Help  
 Press the 'Next' to submit your post-edit and to request the next segment to translate
 (or just press enter when the 'Target' textarea is in focus).
 
+
 
+
 
+
 
 
 
+
 
-
-
-  Translating   
-
-
-
-
+
+
+
+
+
+
 
 
 
diff --git a/lfpe.css b/lfpe.css
index 59d62eb..2f9b4c3 100644
--- a/lfpe.css
+++ b/lfpe.css
@@ -1,57 +1,65 @@
-textarea {
-  font-size: 20px
+html {
+  font-family: Arial, Helvetica, sans-serif;
+  font-size: 18px
 }
 
-p#translating_status {
-  display: none;
-  margin-left: 4.2em;
-  /*margin-top: -5.5em*/
+textarea {
+  font-size: 20px;
+  width: 100%
 }
 
 button {
-  margin: 1em;
-  padding: .25em;
+  margin:     1em;
+  padding:    .25em;
   background: #fff;
   font-weight: bold
 }
 
-p#fi {
-  display: none;
-}
+.bold { font-weight:bold }
 
 div#wrapper {
-  margin: 2em;
+  margin:  2em;
   padding: 1em;
-  border: 1px dashed #000
-}
-
-div#header {
-  margin-bottom: 2em;
+  border:  1px dashed #000
 }
 
-p#footer {
-  text-align:right;
-  font-size: 0.5em;
-  font-weight:bold;
-  margin:0;
-  padding:0;
-  color:#ccc
+span#status {
+  display: none;
+  float:   right
 }
 
-p#desc {
-  font-size:0.8em;
-  width:40%;
-  color: #ccc;
-  text-align:justify
-}
+/* Document overview */
+table#overview { font-size:80% }
+table#overview td.seg_text { width: 45% }
+table#overview td { border-bottom: 1px solid #000 }
+/* /Document overview */
 
-p#desc:hover {
-  color: #000
+p#footer {
+  text-align:  right;
+  font-size:   .5em;
+  font-weight: bold;
+  margin:      0;
+  padding:     0;
+  color:       #303030
 }
 
+/* Header */
+div#header { margin-bottom: 2em }
+img#uni {}
 img#cl {
   margin-bottom:20px;
   margin-left:10px;
-  vertical-align:bottom;
+  vertical-align:bottom
+}
+/* /Header */
+
+/* Help */
+p#help {
+  font-size:  .8em;
+  width:      40%;
+  color:      #ccc;
+  text-align: justify
 }
+p#help:hover { color: #000 }
+/* /Help */
 
diff --git a/lfpe.js b/lfpe.js
index c6cd97f..b10a265 100644
--- a/lfpe.js
+++ b/lfpe.js
@@ -1,20 +1,17 @@
-function CreateCORSRequest(method, url) {
+function CreateCORSRequest(method, url)
+{
   var xhr = new XMLHttpRequest();
   if ("withCredentials" in xhr) {
-    // XHR for Chrome/Firefox/Opera/Safari.
     xhr.open(method, url, true);
-  } else if (typeof XDomainRequest != "undefined") {
-    // XDomainRequest for IE.
-    xhr = new XDomainRequest();
-    xhr.open(method, url);
   } else {
-    // CORS not supported.
     xhr = null;
   }
+
   return xhr;
 }
 
-function submit(e) {
+function catch_return(e)
+{
   if (e.keyCode == 13) {
     e.preventDefault();
     Next();
@@ -23,49 +20,124 @@ function submit(e) {
   return false;
 }
 
+function pause()
+{
+  var paused = document.getElementById("paused");
+  var button = document.getElementById("pause_button");
+  var next_button = document.getElementById("next");
+  if (paused.value == 0) {
+    button.innerHTML = "Unpause";
+    paused.value = 1;
+    next.setAttribute("disabled", "disabled");
+  } else {
+    button.innerHTML = "Pause";
+    paused.value = 0;
+    next.removeAttribute("disabled");
+  }
+}
+
 function Next()
 {
-  url = "http://coltrane.cl.uni-heidelberg.de:60666/next";
-  var pe = document.getElementById("trgt").value;
-  if (pe != "") {
-    var src = document.getElementById("src_pp").value;
-    url += "?example="+src+" %7C%7C%7C "+pe;
+  // elements
+  var button = document.getElementById("next");
+  var target_textarea = document.getElementById("target_textarea")
+  var raw_source_textarea = document.getElementById("raw_source_textarea");
+  var current_seg_id = document.getElementById("current_seg_id");
+  var source = document.getElementById("source");
+  var status = document.getElementById("status");
+
+  // disable button and textarea
+  button.setAttribute("disabled", "disabled");
+  target_textarea.setAttribute("disabled", "disabled");
+
+  var base_url = "http://coltrane.cl.uni-heidelberg.de:60666"; // FIXME: variable
+
+  var key = document.getElementById("key").value;
+  next_url = base_url+"/next?key="+key;
+  var post_edit = target_textarea.value;
+  if (post_edit != "") {
+    // compose request
+    next_url += "&example="+source.value+" %7C%7C%7C "+post_edit;
+    // update document overview
+    document.getElementById("seg_"+(current_seg_id.value)+"_t").innerHTML=post_edit;
+  } else {
+    if (source.value != "") {
+      alert("Error: 1");
+    }
+    // FIXME: do something reasonable
   }
-  document.getElementById("translating_status").style.display = "block";
-  var xhr = CreateCORSRequest('get', url);
+
+  // show 'working' message
+  status.style.display = "block";
+
+  // build request
+  var xhr = CreateCORSRequest('get', next_url);
   if (!xhr) {
-    alert('CORS not supported');
-    return;
+    alert("Error: 2");
+    // FIXME: do something reasonable
   }
 
+  // 'next' request's callbacks
   xhr.onload = function() {
+    /*
+     * translation system is currently handling another request
+     * FIXME: maybe poll server for result?
+     *
+     */
+    if (xhr.responseText == "locked") {
+      alert("Translation system is locked, try again in a moment (reload page and click 'Start/Continue' again).");
+      document.getElementById("status").style.display = "none";
+
+      return;
+    }
+    // got response: seg id\tsource\ttranslation\traw source
+    //                   0   1        2           3
     var x = xhr.responseText.split("\t");
-    if (x == "fi") {
-      document.getElementById("src").style.display = "none";
-      document.getElementById("trgt").style.display = "none";
-      document.getElementById("translating_status").style.display = "none";
-      document.getElementById("next").innerHTML = "Thank you!";
-      document.getElementById("next").disabled = true;
+    if (x == "fi") { // done, hide/disable functional elements
+      raw_source_textarea.style.display = "none";
+      target_textarea.style.display = "none";
+      status.style.display = "none";
+      button.innerHTML = "Session finished, thank you!";
+      button.setAttribute("disabled", "disabled");
+      document.getElementById("pause_button").setAttribute("disabled", "disabled");
+      document.getElementById("seg_"+current_seg_id.value).className = "";
     } else {
-      document.getElementById("src_pp").value = x[0];
-      document.getElementById("src").value = x[2];
-      document.getElementById("src").rows = Math.round(x[2].length/80)+1;
-      var firstLetter = x[1][0].toUpperCase();
-      var rest = x[1].substring(1);
-      var t = firstLetter + rest;
-      document.getElementById("trgt").value = t;
-      document.getElementById("trgt").rows = Math.round(x[1].length/80)+1;
-      document.getElementById("translating_status").style.display = "none";
-      document.getElementById("trgt").focus();
-      document.getElementById("trgt").selectionStart = 0;
-      document.getElementById("trgt").selectionEnd = 0;
+      var id = x[0];
+      var src = x[1];
+      var translation = x[2];
+      var raw_source = x[3];
+
+      // update interface
+      status.style.display = "none";
+      target_textarea.value = translation;
+      raw_source_textarea.value = raw_source;
+      button.innerHTML = "Next";
+      button.removeAttribute("disabled");
+      target_textarea.removeAttribute("disabled", "disabled");
+      document.getElementById("seg_"+id).className = "bold";
+      if (x[0] > 0) {
+        document.getElementById("seg_"+(id-1)).className = "";
+      }
+      target_textarea.rows = Math.round(translation.length/80)+1;
+      raw_source_textarea.rows = Math.round(raw_source.length/80)+1;
+      target_textarea.focus();
+      target_textarea.selectionStart = 0;
+      target_textarea.selectionEnd = 0;
+
+      // remember aux data in DOM
+      current_seg_id.value = id;
+      source.value = src;
+
+      // confirm to server
+      var xhr_confirm = CreateCORSRequest('get', "http://coltrane.cl.uni-heidelberg.de:60666/confirm");
+      xhr_confirm.send(); // FIXME: handle errors
     }
   };
 
   xhr.onerror = function() {
-    alert('Error');
+    // FIXME: do something reasonable
   };
 
-  xhr.send();
+  xhr.send(); // send 'next' request
 }
 
diff --git a/run_server b/run_server
index 97bae1a..ee4a934 100755
--- a/run_server
+++ b/run_server
@@ -2,5 +2,5 @@
 
 export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib
 export PYTHONPATH=~/.local/lib/python2.7/site-packages
-ruby server.rb ../example/conf.rb &>server.rb.out
+./kill;./kill;rm /tmp/server.lock; ./server.rb ../example_pattr/conf.rb
 
diff --git a/server.rb b/server.rb
index a0dcd9c..fdc99fd 100755
--- a/server.rb
+++ b/server.rb
@@ -2,14 +2,40 @@
 
 require 'sinatra'
 require 'sinatra/cross_origin'
+require "sinatra/reloader"
 require 'nanomsg'
 require 'zipf'
 require 'digest'
+require 'json'
 
+# load configuration file and setup global variables
 require_relative "#{ARGV[0]}"
-INPUT = ReadFile.readlines INPUT_FILE
-INPUT_RAW = ReadFile.readlines RAW_INPUT_FILE
-`mkdir -p #{WORK_DIR}/g`
+$lock       = false # lock if currently learning/translating
+$last_reply = nil   # cache last reply
+$confirmed = true   # client received translation?
+if !FileTest.exist? LOCK_FILE
+  $db  = {} # FIXME: that is supposed to be a database connection
+  $env = {}
+end
+
+$daemons = {
+  :detokenizer  => "/fast_scratch/simianer/lfpe/lfpe/de-tok.rb -a D -S '__ADDR__' -p #{SCRIPTS_DIR} -l #{TARGET_LANG}",
+  :tokenizer    =>  "/fast_scratch/simianer/lfpe/lfpe/de-tok.rb -a T -S '__ADDR__' -p #{SCRIPTS_DIR} -l #{TARGET_LANG}",
+  :extractor    => "python -m cdec.sa.extract -c #{DATA_DIR}/sa.ini --online -u -S '__ADDR__'",
+  :aligner_fwd  => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/forward.params -m #{FWD_MEAN_SRCLEN_MULT} -T #{FWD_TENSION} --sock_url '__ADDR__'",
+  :aligner_back => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/backward.params -m #{BACK_MEAN_SRCLEN_MULT} -T #{BACK_TENSION} --sock_url '__ADDR__'",
+  :atools       => "#{CDEC_NET}/utils/atools_net -c grow-diag-final-and -S '__ADDR__'",
+  :dtrain       => "#{CDEC_NET}/training/dtrain/dtrain_net_interface -c #{DATA_DIR}/dtrain.ini -o #{WORK_DIR}/weights.final -a '__ADDR__'"
+}
+
+# setup Sinatra
+set :bind,              SERVER_IP
+set :port,              WEB_PORT
+set :allow_origin,      :any
+set :allow_methods,     [:get, :post, :options]
+set :allow_credentials, true
+set :max_age,           "1728000"
+set :expose_headers,    ['Content-Type']
 
 def start_daemon cmd, name, addr
   STDERR.write "> starting #{name} daemon\n"
@@ -24,112 +50,179 @@ def start_daemon cmd, name, addr
   return sock, pid
 end
 
-def stop_all_daemons env
+def stop_all_daemons
   STDERR.write "shutting down all daemons\n"
-  env.each { |name,p|
+  $env.each { |name,p|
     p[:socket].send "shutdown"
     STDERR.write "< #{name} is #{p[:socket].recv}\n"
   }
 end
 
-daemons = {
-  :extractor    => "python -m cdec.sa.extract -c #{DATA_DIR}/sa.ini --online -u -S '__ADDR__'",
-  :aligner_fwd  => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/forward.params -m #{FWD_MEAN_SRCLEN_MULT} -T #{FWD_TENSION} --sock_url '__ADDR__'",
-  :aligner_back => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/backward.params -m #{BACK_MEAN_SRCLEN_MULT} -T #{BACK_TENSION} --sock_url '__ADDR__'",
-  :atools       => "#{CDEC_NET}/utils/atools_net -c grow-diag-final-and -S '__ADDR__'",
-  :dtrain       => "#{CDEC_NET}/training/dtrain/dtrain_net_interface -c #{DATA_DIR}/dtrain.ini -o #{WORK_DIR}/weights.final -a '__ADDR__'" ##{DTRAIN_EXTRA}"
-}
+def update_database # FIXME: real database
+  $db['progress'] += 1
+  j = JSON.generate $db
+  f = WriteFile.new DB_FILE
+  f.write j.to_s
+  f.close
+end
 
-env = {}
-port = BEGIN_PORT_RANGE
-daemons.each { |name,cmd|
-  sock, pid = start_daemon cmd, name, "tcp://127.0.0.1:#{port}"
-  env[name] = { :socket => sock, :pid => pid }
-  port += 1
-}
+def init
+  # database connection
+  $db = JSON.parse ReadFile.read DB_FILE
+  # working directory
+  `mkdir -p #{WORK_DIR}/g`
+  # setup environment, start daemons
+  port = BEGIN_PORT_RANGE
+  $daemons.each { |name,cmd|
+    sock, pid = start_daemon cmd, name, "tcp://127.0.0.1:#{port}"
+    $env[name] = { :socket => sock, :pid => pid }
+    port += 1
+  }
+  `touch #{LOCK_FILE}`
+end
 
-set :bind, SERVER_IP
-set :port, WEB_PORT
-set :allow_origin, :any
-set :allow_methods, [:get, :post, :options]
-set :allow_credentials, true
-set :max_age, "1728000"
-set :expose_headers, ['Content-Type']
+init if !FileTest.exist?(LOCK_FILE)
 
 get '/' do
   cross_origin
   "Nothing to see here."
 end
 
+# receive post-edit, send translation
 get '/next' do
   cross_origin
+  return "locked" if $lock
+  $lock = true
+  key = params[:key] # FIXME: do something with it
   if params[:example]
     source, reference = params[:example].strip.split(" ||| ")
+    # tokenize, lowercase
+    $db['post_edits_raw'] << reference.strip
+    $env[:tokenizer][:socket].send reference
+      STDERR.write "[tokenizer] waiting ...\n"
+    reference = $env[:tokenizer][:socket].recv.force_encoding("UTF-8").strip
+      STDERR.write "[tokenizer] < received tokenized reference: '#{reference}'\n"
+    reference.downcase!
+    # save post-edits
+    $db['post_edits'] << reference.strip
     # update weights
     grammar = "#{WORK_DIR}/g/#{Digest::SHA256.hexdigest(source)}.grammar"
     annotated_source = " #{source}  "
     msg = "#{annotated_source} ||| #{reference}"
       STDERR.write "[dtrain] > sending '#{msg}' for update\n"
-    env[:dtrain][:socket].send msg
+    $env[:dtrain][:socket].send msg
       STDERR.write "[dtrain] waiting for confirmation ...\n"
-      STDERR.write "[dtrain] < says it's #{env[:dtrain][:socket].recv}\n"
+      STDERR.write "[dtrain] < says it's #{$env[:dtrain][:socket].recv}\n"
     # update grammar extractor
     # get forward alignment
     msg = "#{source} ||| #{reference}"
       STDERR.write "[aligner_fwd] > sending '#{msg}' for forced alignment\n"
-    env[:aligner_fwd][:socket].send msg
+    $env[:aligner_fwd][:socket].send msg
       STDERR.write "[aligner_fwd] waiting for alignment ...\n"
-    a_fwd = env[:aligner_fwd][:socket].recv.strip
+    a_fwd = $env[:aligner_fwd][:socket].recv.strip
       STDERR.write "[aligner_fwd] < got alignment: '#{a_fwd}'\n"
     # get backward alignment
     msg = "#{source} ||| #{reference}"
       STDERR.write "[aligner_back] > sending '#{msg}' for forced alignment\n"
-    env[:aligner_back][:socket].send msg
+    $env[:aligner_back][:socket].send msg
       STDERR.write "[aligner_back] waiting for alignment ...\n"
-    a_back = env[:aligner_back][:socket].recv.strip
+    a_back = $env[:aligner_back][:socket].recv.strip
       STDERR.write "[aligner_back] < got alignment: '#{a_back}'\n"
-    # combine alignments
+    # symmetrize alignment
     msg = "#{a_fwd} ||| #{a_back}"
       STDERR.write "[atools] > sending '#{msg}' to combine alignments\n"
-    env[:atools][:socket].send msg
+    $env[:atools][:socket].send msg
       STDERR.write "[atools] waiting for alignment ...\n"
-    a = env[:atools][:socket].recv.strip
+    a = $env[:atools][:socket].recv.strip
       STDERR.write "[atools] < got alignment '#{a}'\n"
     # actual extractor
     msg = "TEST ||| #{source} ||| #{reference} ||| #{a}"
       STDERR.write "[extractor] > sending '#{msg}' for learning\n"
-    env[:extractor][:socket].send "TEST ||| #{source} ||| #{reference} ||| #{a}"
+    $env[:extractor][:socket].send "TEST ||| #{source} ||| #{reference} ||| #{a}"
       STDERR.write "[extractor] waiting for confirmation ...\n"
-      STDERR.write "[extractor] < got '#{env[:extractor][:socket].recv}'\n"
+      STDERR.write "[extractor] < got '#{$env[:extractor][:socket].recv}'\n"
+    update_database
   end
-  source = INPUT.shift
-  raw_source = INPUT_RAW.shift
+  source     = $db['source_segments'][$db['progress']]
+  raw_source = $db['raw_source_segments'][$db['progress']]
   if !source # input is done -> displays 'Thank you!'
     STDERR.write ">>> end of input, sending 'fi'\n"
-    "fi"
+    $lock = false
+    return "fi"
+  elsif !$confirmed
+    $lock = false
+    return $last_reply
   else # translate next sentence
     source.strip!
     # generate grammar for current sentence
     grammar = "#{WORK_DIR}/g/#{Digest::SHA256.hexdigest(source)}.grammar" # FIXME: keep grammars?
     msg = "- ||| #{source} ||| #{grammar}"                                # FIXME: content identifier useful?
       STDERR.write "[extractor] > asking to generate grammar: '#{msg}'\n"
-    env[:extractor][:socket].send msg
+    $env[:extractor][:socket].send msg
       STDERR.write "[extractor] waiting for confirmation ...\n"
-      STDERR.write "[extractor] < says it generated #{env[:extractor][:socket].recv.strip}\n"
+      STDERR.write "[extractor] < says it generated #{$env[:extractor][:socket].recv.force_encoding("UTF-8").strip}\n"
     # translation
     msg = "act:translate |||  #{source}  "
       STDERR.write "[dtrain] > asking to translate: '#{msg}'\n"
-    env[:dtrain][:socket].send msg
+    $env[:dtrain][:socket].send msg
       STDERR.write "[dtrain] waiting for translation ...\n"
-    transl = env[:dtrain][:socket].recv.force_encoding "UTF-8"
+    transl = $env[:dtrain][:socket].recv.force_encoding "UTF-8"
       STDERR.write "[dtrain] < received translation: '#{transl}'\n"
-    "#{source}\t#{transl.strip}\t#{raw_source}"
+    # detokenizer
+    $env[:detokenizer][:socket].send transl
+      STDERR.write "[detokenizer] waiting ...\n"
+    transl = $env[:detokenizer][:socket].recv.force_encoding("UTF-8").strip
+      STDERR.write "[detokenizer] < received final translation: '#{transl}'\n"
+    # reply
+    $last_reply = "#{$db['progress']}\t#{source}\t#{transl.strip}\t#{raw_source}"
+    $lock = false
+    $confirmed = false
+    STDERR.write ">>> response: '#{$last_reply}'"
+    return $last_reply
   end
+
+  return "oh oh" # FIXME: do something sensible
+end
+
+# client confirms received translation
+get '/confirm' do
+  cross_origin
+  STDERR.write "confirmed = #{$confirmed}\n"
+  $confirmed = true
+
+  return "#{$confirmed}"
 end
 
 # stop daemons and shut down server
 get '/shutdown' do
-  stop_all_daemons env
-  exit
+  stop_all_daemons
+
+  "ready to shutdown"
+end
+
+# reset current session
+get '/reset' do
+  return "locked" if $lock
+  $db = JSON.parse ReadFile.read DB_FILE # FIXME: database ..
+  $db['post_edits'].clear
+  $db['post_edits_raw'].clear
+  update_database
+  $db['progress'] = 0
+  $confirmed = true
+
+  return "#{$db.to_s}"
+end
+
+# load other db file than configured
+get '/load/:name' do
+  return "locked" if $lock
+  $db = JSON.parse ReadFile.read "/fast_scratch/simianer/lfpe/example_pattr/#{params[:name]}.json.original"
+  $db['post_edits'].clear
+  $db['post_edits_raw'].clear
+  update_database
+  $db['progress'] = 0
+  $confirmed = true
+
+  "#{$db.to_s}"
 end
 
-- 
cgit v1.2.3