From aa79a39c1c98313d73eed107623fe435c46acc42 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Wed, 13 May 2015 17:46:03 +0200 Subject: full feature set --- model/cdec.ini | 28 ---------- model/dtrain.ini | 4 -- model/grammar.gz | Bin 153 -> 0 bytes model/nc-wmt11.en.srilm.gz | Bin 16017291 -> 0 bytes model/run | 11 ---- model/src.gz | Bin 128 -> 0 bytes model/weights.init | 12 ----- run_server | 3 +- server.rb | 132 +++++++++++++++++++++++++++++++++++---------- 9 files changed, 106 insertions(+), 84 deletions(-) delete mode 100644 model/cdec.ini delete mode 100644 model/dtrain.ini delete mode 100644 model/grammar.gz delete mode 100644 model/nc-wmt11.en.srilm.gz delete mode 100755 model/run delete mode 100644 model/src.gz delete mode 100644 model/weights.init mode change 100644 => 100755 server.rb diff --git a/model/cdec.ini b/model/cdec.ini deleted file mode 100644 index 36f53ac..0000000 --- a/model/cdec.ini +++ /dev/null @@ -1,28 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -scfg_max_span_limit=15 -intersection_strategy=cube_pruning -cubepruning_pop_limit=200 -grammar=grammar.gz -feature_function=WordPenalty -feature_function=KLanguageModel nc-wmt11.en.srilm.gz -# all currently working feature functions for translation: -# (with those features active that were used in the ACL paper) -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -feature_function=RuleIdentityFeatures -feature_function=RuleSourceBigramFeatures -feature_function=RuleTargetBigramFeatures -feature_function=RuleShape -#feature_function=LexicalFeatures 1 1 1 -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures -weights=weights.init diff --git a/model/dtrain.ini b/model/dtrain.ini deleted file mode 100644 index b6d29bb..0000000 --- a/model/dtrain.ini +++ /dev/null @@ -1,4 +0,0 @@ -decoder_conf=./cdec.ini # config for cdec -k=100 # use 100best lists -N=4 # optimize (approx.) BLEU4 -margin=0.0 # perceptron's margin diff --git a/model/grammar.gz b/model/grammar.gz deleted file mode 100644 index 8c5f570..0000000 Binary files a/model/grammar.gz and /dev/null differ diff --git a/model/nc-wmt11.en.srilm.gz b/model/nc-wmt11.en.srilm.gz deleted file mode 100644 index 7ce8105..0000000 Binary files a/model/nc-wmt11.en.srilm.gz and /dev/null differ diff --git a/model/run b/model/run deleted file mode 100755 index 2ab2498..0000000 --- a/model/run +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -x - -export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib - -while true; do -/fast_scratch/simianer/lfpe/cdec_net/training/dtrain/dtrain_net_interface -c dtrain.ini -a ipc:///tmp/dtrain.ipc &>dtrain.out & -PID=$! -sleep 600 -kill $PID -done - diff --git a/model/src.gz b/model/src.gz deleted file mode 100644 index 3f8f895..0000000 Binary files a/model/src.gz and /dev/null differ diff --git a/model/weights.init b/model/weights.init deleted file mode 100644 index 0d09f9f..0000000 --- a/model/weights.init +++ /dev/null @@ -1,12 +0,0 @@ -CountEF 0.1 -EgivenFCoherent -0.1 -Glue 0.01 -IsSingletonF -0.01 -IsSingletonFE -0.01 -LanguageModel 0.1 -LanguageModel_OOV -1 -MaxLexFgivenE -0.1 -MaxLexEgivenF -0.1 -PassThrough -0.1 -SampleCountF -0.1 -WordPenalty -0.1 diff --git a/run_server b/run_server index 5747499..39c92ae 100755 --- a/run_server +++ b/run_server @@ -1,5 +1,6 @@ #!/bin/bash -x export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib -ruby server.rb &>server.rb.out +export PYTHONPATH=~/.local/lib/python2.7/site-packages +ruby server2.rb ../example/conf.rb #&>server2.rb.out diff --git a/server.rb b/server.rb old mode 100644 new mode 100755 index def401e..922f103 --- a/server.rb +++ b/server.rb @@ -4,23 +4,57 @@ require 'sinatra' require 'sinatra/cross_origin' require 'nanomsg' require 'zipf' +require 'digest' -set :bind, '147.142.207.52' -set :port, 60666 +require_relative "#{ARGV[0]}" +INPUT = ReadFile.readlines INPUT_FILE +`mkdir -p #{WORK_DIR}/g` +def start_daemon cmd, name, addr + STDERR.write "> starting #{name} daemon\n" + cmd.gsub! '__ADDR__', addr + pid = fork do + exec cmd + end + sock = NanoMsg::PairSocket.new + sock.connect addr + STDERR.write ">> got #{sock.recv} from #{name}\n" + + return sock, pid +end + +def stop_all_daemons env + STDERR.write "shutting down all daemons\n" + env.each { |name,p| + p[:socket].send "shutdown" + STDERR.write ">> #{name} is #{p[:socket].recv}" + } +end + +daemons = { + :extractor => "python -m cdec.sa.extract -c #{DATA_DIR}/sa.ini --online -u -S '__ADDR__'", + :aligner_fwd => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/forward.params --sock_url '__ADDR__'", + :aligner_back => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/backward.params --sock_url '__ADDR__'", + :atools => "#{CDEC_NET}/utils/atools_net -c grow-diag-final-and -S '__ADDR__'", + :dtrain => "#{CDEC_NET}/training/dtrain/dtrain_net_interface -c #{DATA_DIR}/dtrain.ini -o #{WORK_DIR}/weights.final -a '__ADDR__'" +} + +env = {} +port = BEGIN_PORT_RANGE +daemons.each { |name,cmd| + sock, pid = start_daemon cmd, name, "tcp://127.0.0.1:#{port}" + env[name] = { :socket => sock, :pid => pid } + port += 1 +} + +set :bind, SERVER_IP +set :port, WEB_PORT set :allow_origin, :any set :allow_methods, [:get, :post, :options] set :allow_credentials, true set :max_age, "1728000" set :expose_headers, ['Content-Type'] -sock = NanoMsg::PairSocket.new -addr = "ipc:///tmp/dtrain.ipc" -sock.bind addr - -input = ReadFile.readlines_strip "model/src.gz" -input_ = Array.new input - get '/' do cross_origin "Nothing to see here." @@ -29,29 +63,71 @@ end get '/next' do cross_origin if params[:example] - sock.send params[:example].strip - puts params.to_s - sock.recv # dummy + source, reference = params[:example].strip.split(" ||| ") + # update weights + grammar = "#{WORK_DIR}/g/#{Digest::SHA256.hexdigest(source)}.grammar" + annotated_source = " #{source} " + msg = "#{annotated_source} ||| #{reference}" + STDERR.write "[dtrain] > sending '#{msg}' for update\n" + env[:dtrain][:socket].send msg + STDERR.write "[dtrain] waiting for confirmation ...\n" + STDERR.write "[dtrain] < says it's #{env[:dtrain][:socket].recv}\n" + # update grammar extractor + # get forward alignment + msg = "#{source} ||| #{reference}" + STDERR.write "[aligner_fwd] > sending '#{msg}' for forced alignment\n" + env[:aligner_fwd][:socket].send msg + STDERR.write "[aligner_fwd] waiting for alignment ...\n" + a_fwd = env[:aligner_fwd][:socket].recv.strip + STDERR.write "[aligner_fwd] < got alignment: '#{a_fwd}'\n" + # get backward alignment + msg = "#{source} ||| #{reference}" + STDERR.write "[aligner_back] > sending '#{msg}' for forced alignment\n" + env[:aligner_back][:socket].send msg + STDERR.write "[aligner_back] waiting for alignment ...\n" + a_back = env[:aligner_back][:socket].recv.strip + STDERR.write "[aligner_back] < got alignment: '#{a_back}'\n" + # combine alignments + msg = "#{a_fwd} ||| #{a_back}" + STDERR.write "[atools] > sending '#{msg}' to combine alignments\n" + env[:atools][:socket].send msg + STDERR.write "[atools] waiting for alignment ...\n" + a = env[:atools][:socket].recv.strip + STDERR.write "[atools] < got alignment '#{a}'\n" + # actual extractor + msg = "TEST ||| #{source} ||| #{reference} ||| #{a}" + STDERR.write "[extractor] > sending '#{msg}' for learning\n" + env[:extractor][:socket].send "TEST ||| #{source} ||| #{reference} ||| #{a}" + STDERR.write "[extractor] waiting for confirmation ...\n" + STDERR.write "[extractor] < got '#{env[:extractor][:socket].recv}'\n" end - src = input.shift - if !src - puts "end of input, sending 'fi'" + source = INPUT.shift + if !source # input is done -> displays 'Thank you!' + STDERR.write ">>> end of input, sending 'fi'\n" "fi" - else - puts "sending source '#{src}' ..." - sock.send "act:translate ||| #{src}" - puts "done" - sleep 1 - puts "waiting for translation ..." - t = sock.recv - puts "got translation '#{t}'" - "#{src}\t#{t}" + else # translate next sentence + source.strip! + # generate grammar for current sentence + grammar = "#{WORK_DIR}/g/#{Digest::SHA256.hexdigest(source)}.grammar" # FIXME: keep grammars? + msg = "- ||| #{source} ||| #{grammar}" # FIXME: content identifier useful? + STDERR.write "[extractor] > asking to generate grammar: '#{msg}'\n" + env[:extractor][:socket].send msg + STDERR.write "[extractor] waiting for confirmation ...\n" + STDERR.write "[extractor] < says it generated #{env[:extractor][:socket].recv.strip}\n" + # translation + msg = "act:translate ||| #{source} " + STDERR.write "[dtrain] > asking to translate: '#{msg}'\n" + env[:dtrain][:socket].send msg + STDERR.write "[dtrain] waiting for translation ...\n" + transl = env[:dtrain][:socket].recv.encode "UTF-8" + STDERR.write "[dtrain] < received translation: '#{transl}'\n" + "#{source}\t#{transl}" end end -get '/reset' do - cross_origin - input = Array.new input_ - "done" +# stop daemons and shut down server +get '/shutdown' do + stop_all_daemons env + exit end -- cgit v1.2.3