summaryrefslogtreecommitdiff
path: root/server.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2015-06-10 12:31:09 +0200
committerPatrick Simianer <p@simianer.de>2015-06-10 12:31:09 +0200
commit334c3820c673c9226513b69df93b43ac37308bd1 (patch)
treef76cf42d270280a01288ee30d6c8a26dd4c485c0 /server.rb
parent30d056ba5cb3b9262b193407adfbbd288c63dc3e (diff)
stable system
Diffstat (limited to 'server.rb')
-rwxr-xr-xserver.rb187
1 files changed, 140 insertions, 47 deletions
diff --git a/server.rb b/server.rb
index a0dcd9c..fdc99fd 100755
--- a/server.rb
+++ b/server.rb
@@ -2,14 +2,40 @@
require 'sinatra'
require 'sinatra/cross_origin'
+require "sinatra/reloader"
require 'nanomsg'
require 'zipf'
require 'digest'
+require 'json'
+# load configuration file and setup global variables
require_relative "#{ARGV[0]}"
-INPUT = ReadFile.readlines INPUT_FILE
-INPUT_RAW = ReadFile.readlines RAW_INPUT_FILE
-`mkdir -p #{WORK_DIR}/g`
+$lock = false # lock if currently learning/translating
+$last_reply = nil # cache last reply
+$confirmed = true # client received translation?
+if !FileTest.exist? LOCK_FILE
+ $db = {} # FIXME: that is supposed to be a database connection
+ $env = {}
+end
+
+$daemons = {
+ :detokenizer => "/fast_scratch/simianer/lfpe/lfpe/de-tok.rb -a D -S '__ADDR__' -p #{SCRIPTS_DIR} -l #{TARGET_LANG}",
+ :tokenizer => "/fast_scratch/simianer/lfpe/lfpe/de-tok.rb -a T -S '__ADDR__' -p #{SCRIPTS_DIR} -l #{TARGET_LANG}",
+ :extractor => "python -m cdec.sa.extract -c #{DATA_DIR}/sa.ini --online -u -S '__ADDR__'",
+ :aligner_fwd => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/forward.params -m #{FWD_MEAN_SRCLEN_MULT} -T #{FWD_TENSION} --sock_url '__ADDR__'",
+ :aligner_back => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/backward.params -m #{BACK_MEAN_SRCLEN_MULT} -T #{BACK_TENSION} --sock_url '__ADDR__'",
+ :atools => "#{CDEC_NET}/utils/atools_net -c grow-diag-final-and -S '__ADDR__'",
+ :dtrain => "#{CDEC_NET}/training/dtrain/dtrain_net_interface -c #{DATA_DIR}/dtrain.ini -o #{WORK_DIR}/weights.final -a '__ADDR__'"
+}
+
+# setup Sinatra
+set :bind, SERVER_IP
+set :port, WEB_PORT
+set :allow_origin, :any
+set :allow_methods, [:get, :post, :options]
+set :allow_credentials, true
+set :max_age, "1728000"
+set :expose_headers, ['Content-Type']
def start_daemon cmd, name, addr
STDERR.write "> starting #{name} daemon\n"
@@ -24,112 +50,179 @@ def start_daemon cmd, name, addr
return sock, pid
end
-def stop_all_daemons env
+def stop_all_daemons
STDERR.write "shutting down all daemons\n"
- env.each { |name,p|
+ $env.each { |name,p|
p[:socket].send "shutdown"
STDERR.write "< #{name} is #{p[:socket].recv}\n"
}
end
-daemons = {
- :extractor => "python -m cdec.sa.extract -c #{DATA_DIR}/sa.ini --online -u -S '__ADDR__'",
- :aligner_fwd => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/forward.params -m #{FWD_MEAN_SRCLEN_MULT} -T #{FWD_TENSION} --sock_url '__ADDR__'",
- :aligner_back => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/backward.params -m #{BACK_MEAN_SRCLEN_MULT} -T #{BACK_TENSION} --sock_url '__ADDR__'",
- :atools => "#{CDEC_NET}/utils/atools_net -c grow-diag-final-and -S '__ADDR__'",
- :dtrain => "#{CDEC_NET}/training/dtrain/dtrain_net_interface -c #{DATA_DIR}/dtrain.ini -o #{WORK_DIR}/weights.final -a '__ADDR__'" ##{DTRAIN_EXTRA}"
-}
+def update_database # FIXME: real database
+ $db['progress'] += 1
+ j = JSON.generate $db
+ f = WriteFile.new DB_FILE
+ f.write j.to_s
+ f.close
+end
-env = {}
-port = BEGIN_PORT_RANGE
-daemons.each { |name,cmd|
- sock, pid = start_daemon cmd, name, "tcp://127.0.0.1:#{port}"
- env[name] = { :socket => sock, :pid => pid }
- port += 1
-}
+def init
+ # database connection
+ $db = JSON.parse ReadFile.read DB_FILE
+ # working directory
+ `mkdir -p #{WORK_DIR}/g`
+ # setup environment, start daemons
+ port = BEGIN_PORT_RANGE
+ $daemons.each { |name,cmd|
+ sock, pid = start_daemon cmd, name, "tcp://127.0.0.1:#{port}"
+ $env[name] = { :socket => sock, :pid => pid }
+ port += 1
+ }
+ `touch #{LOCK_FILE}`
+end
-set :bind, SERVER_IP
-set :port, WEB_PORT
-set :allow_origin, :any
-set :allow_methods, [:get, :post, :options]
-set :allow_credentials, true
-set :max_age, "1728000"
-set :expose_headers, ['Content-Type']
+init if !FileTest.exist?(LOCK_FILE)
get '/' do
cross_origin
"Nothing to see here."
end
+# receive post-edit, send translation
get '/next' do
cross_origin
+ return "locked" if $lock
+ $lock = true
+ key = params[:key] # FIXME: do something with it
if params[:example]
source, reference = params[:example].strip.split(" ||| ")
+ # tokenize, lowercase
+ $db['post_edits_raw'] << reference.strip
+ $env[:tokenizer][:socket].send reference
+ STDERR.write "[tokenizer] waiting ...\n"
+ reference = $env[:tokenizer][:socket].recv.force_encoding("UTF-8").strip
+ STDERR.write "[tokenizer] < received tokenized reference: '#{reference}'\n"
+ reference.downcase!
+ # save post-edits
+ $db['post_edits'] << reference.strip
# update weights
grammar = "#{WORK_DIR}/g/#{Digest::SHA256.hexdigest(source)}.grammar"
annotated_source = "<seg grammar=\"#{grammar}\"> #{source} </seg>"
msg = "#{annotated_source} ||| #{reference}"
STDERR.write "[dtrain] > sending '#{msg}' for update\n"
- env[:dtrain][:socket].send msg
+ $env[:dtrain][:socket].send msg
STDERR.write "[dtrain] waiting for confirmation ...\n"
- STDERR.write "[dtrain] < says it's #{env[:dtrain][:socket].recv}\n"
+ STDERR.write "[dtrain] < says it's #{$env[:dtrain][:socket].recv}\n"
# update grammar extractor
# get forward alignment
msg = "#{source} ||| #{reference}"
STDERR.write "[aligner_fwd] > sending '#{msg}' for forced alignment\n"
- env[:aligner_fwd][:socket].send msg
+ $env[:aligner_fwd][:socket].send msg
STDERR.write "[aligner_fwd] waiting for alignment ...\n"
- a_fwd = env[:aligner_fwd][:socket].recv.strip
+ a_fwd = $env[:aligner_fwd][:socket].recv.strip
STDERR.write "[aligner_fwd] < got alignment: '#{a_fwd}'\n"
# get backward alignment
msg = "#{source} ||| #{reference}"
STDERR.write "[aligner_back] > sending '#{msg}' for forced alignment\n"
- env[:aligner_back][:socket].send msg
+ $env[:aligner_back][:socket].send msg
STDERR.write "[aligner_back] waiting for alignment ...\n"
- a_back = env[:aligner_back][:socket].recv.strip
+ a_back = $env[:aligner_back][:socket].recv.strip
STDERR.write "[aligner_back] < got alignment: '#{a_back}'\n"
- # combine alignments
+ # symmetrize alignment
msg = "#{a_fwd} ||| #{a_back}"
STDERR.write "[atools] > sending '#{msg}' to combine alignments\n"
- env[:atools][:socket].send msg
+ $env[:atools][:socket].send msg
STDERR.write "[atools] waiting for alignment ...\n"
- a = env[:atools][:socket].recv.strip
+ a = $env[:atools][:socket].recv.strip
STDERR.write "[atools] < got alignment '#{a}'\n"
# actual extractor
msg = "TEST ||| #{source} ||| #{reference} ||| #{a}"
STDERR.write "[extractor] > sending '#{msg}' for learning\n"
- env[:extractor][:socket].send "TEST ||| #{source} ||| #{reference} ||| #{a}"
+ $env[:extractor][:socket].send "TEST ||| #{source} ||| #{reference} ||| #{a}"
STDERR.write "[extractor] waiting for confirmation ...\n"
- STDERR.write "[extractor] < got '#{env[:extractor][:socket].recv}'\n"
+ STDERR.write "[extractor] < got '#{$env[:extractor][:socket].recv}'\n"
+ update_database
end
- source = INPUT.shift
- raw_source = INPUT_RAW.shift
+ source = $db['source_segments'][$db['progress']]
+ raw_source = $db['raw_source_segments'][$db['progress']]
if !source # input is done -> displays 'Thank you!'
STDERR.write ">>> end of input, sending 'fi'\n"
- "fi"
+ $lock = false
+ return "fi"
+ elsif !$confirmed
+ $lock = false
+ return $last_reply
else # translate next sentence
source.strip!
# generate grammar for current sentence
grammar = "#{WORK_DIR}/g/#{Digest::SHA256.hexdigest(source)}.grammar" # FIXME: keep grammars?
msg = "- ||| #{source} ||| #{grammar}" # FIXME: content identifier useful?
STDERR.write "[extractor] > asking to generate grammar: '#{msg}'\n"
- env[:extractor][:socket].send msg
+ $env[:extractor][:socket].send msg
STDERR.write "[extractor] waiting for confirmation ...\n"
- STDERR.write "[extractor] < says it generated #{env[:extractor][:socket].recv.strip}\n"
+ STDERR.write "[extractor] < says it generated #{$env[:extractor][:socket].recv.force_encoding("UTF-8").strip}\n"
# translation
msg = "act:translate ||| <seg grammar=\"#{grammar}\"> #{source} </seg>"
STDERR.write "[dtrain] > asking to translate: '#{msg}'\n"
- env[:dtrain][:socket].send msg
+ $env[:dtrain][:socket].send msg
STDERR.write "[dtrain] waiting for translation ...\n"
- transl = env[:dtrain][:socket].recv.force_encoding "UTF-8"
+ transl = $env[:dtrain][:socket].recv.force_encoding "UTF-8"
STDERR.write "[dtrain] < received translation: '#{transl}'\n"
- "#{source}\t#{transl.strip}\t#{raw_source}"
+ # detokenizer
+ $env[:detokenizer][:socket].send transl
+ STDERR.write "[detokenizer] waiting ...\n"
+ transl = $env[:detokenizer][:socket].recv.force_encoding("UTF-8").strip
+ STDERR.write "[detokenizer] < received final translation: '#{transl}'\n"
+ # reply
+ $last_reply = "#{$db['progress']}\t#{source}\t#{transl.strip}\t#{raw_source}"
+ $lock = false
+ $confirmed = false
+ STDERR.write ">>> response: '#{$last_reply}'"
+ return $last_reply
end
+
+ return "oh oh" # FIXME: do something sensible
+end
+
+# client confirms received translation
+get '/confirm' do
+ cross_origin
+ STDERR.write "confirmed = #{$confirmed}\n"
+ $confirmed = true
+
+ return "#{$confirmed}"
end
# stop daemons and shut down server
get '/shutdown' do
- stop_all_daemons env
- exit
+ stop_all_daemons
+
+ "ready to shutdown"
+end
+
+# reset current session
+get '/reset' do
+ return "locked" if $lock
+ $db = JSON.parse ReadFile.read DB_FILE # FIXME: database ..
+ $db['post_edits'].clear
+ $db['post_edits_raw'].clear
+ update_database
+ $db['progress'] = 0
+ $confirmed = true
+
+ return "#{$db.to_s}"
+end
+
+# load other db file than configured
+get '/load/:name' do
+ return "locked" if $lock
+ $db = JSON.parse ReadFile.read "/fast_scratch/simianer/lfpe/example_pattr/#{params[:name]}.json.original"
+ $db['post_edits'].clear
+ $db['post_edits_raw'].clear
+ update_database
+ $db['progress'] = 0
+ $confirmed = true
+
+ "#{$db.to_s}"
end