diff options
author | Patrick Simianer <p@simianer.de> | 2015-05-13 17:46:03 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2015-05-13 17:46:03 +0200 |
commit | aa79a39c1c98313d73eed107623fe435c46acc42 (patch) | |
tree | 4d7bd49d2e38fdc357db6209868d3ff150de055d | |
parent | aa97f229d6a6a1e5bc2bd133d0d39eefff888566 (diff) |
full feature set
-rw-r--r-- | model/cdec.ini | 28 | ||||
-rw-r--r-- | model/dtrain.ini | 4 | ||||
-rw-r--r-- | model/grammar.gz | bin | 153 -> 0 bytes | |||
-rw-r--r-- | model/nc-wmt11.en.srilm.gz | bin | 16017291 -> 0 bytes | |||
-rwxr-xr-x | model/run | 11 | ||||
-rw-r--r-- | model/src.gz | bin | 128 -> 0 bytes | |||
-rw-r--r-- | model/weights.init | 12 | ||||
-rwxr-xr-x | run_server | 3 | ||||
-rwxr-xr-x[-rw-r--r--] | server.rb | 132 |
9 files changed, 106 insertions, 84 deletions
diff --git a/model/cdec.ini b/model/cdec.ini deleted file mode 100644 index 36f53ac..0000000 --- a/model/cdec.ini +++ /dev/null @@ -1,28 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -scfg_max_span_limit=15 -intersection_strategy=cube_pruning -cubepruning_pop_limit=200 -grammar=grammar.gz -feature_function=WordPenalty -feature_function=KLanguageModel nc-wmt11.en.srilm.gz -# all currently working feature functions for translation: -# (with those features active that were used in the ACL paper) -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -feature_function=RuleIdentityFeatures -feature_function=RuleSourceBigramFeatures -feature_function=RuleTargetBigramFeatures -feature_function=RuleShape -#feature_function=LexicalFeatures 1 1 1 -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures -weights=weights.init diff --git a/model/dtrain.ini b/model/dtrain.ini deleted file mode 100644 index b6d29bb..0000000 --- a/model/dtrain.ini +++ /dev/null @@ -1,4 +0,0 @@ -decoder_conf=./cdec.ini # config for cdec -k=100 # use 100best lists -N=4 # optimize (approx.) BLEU4 -margin=0.0 # perceptron's margin diff --git a/model/grammar.gz b/model/grammar.gz Binary files differdeleted file mode 100644 index 8c5f570..0000000 --- a/model/grammar.gz +++ /dev/null diff --git a/model/nc-wmt11.en.srilm.gz b/model/nc-wmt11.en.srilm.gz Binary files differdeleted file mode 100644 index 7ce8105..0000000 --- a/model/nc-wmt11.en.srilm.gz +++ /dev/null diff --git a/model/run b/model/run deleted file mode 100755 index 2ab2498..0000000 --- a/model/run +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -x - -export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib - -while true; do -/fast_scratch/simianer/lfpe/cdec_net/training/dtrain/dtrain_net_interface -c dtrain.ini -a ipc:///tmp/dtrain.ipc &>dtrain.out & -PID=$! -sleep 600 -kill $PID -done - diff --git a/model/src.gz b/model/src.gz Binary files differdeleted file mode 100644 index 3f8f895..0000000 --- a/model/src.gz +++ /dev/null diff --git a/model/weights.init b/model/weights.init deleted file mode 100644 index 0d09f9f..0000000 --- a/model/weights.init +++ /dev/null @@ -1,12 +0,0 @@ -CountEF 0.1 -EgivenFCoherent -0.1 -Glue 0.01 -IsSingletonF -0.01 -IsSingletonFE -0.01 -LanguageModel 0.1 -LanguageModel_OOV -1 -MaxLexFgivenE -0.1 -MaxLexEgivenF -0.1 -PassThrough -0.1 -SampleCountF -0.1 -WordPenalty -0.1 @@ -1,5 +1,6 @@ #!/bin/bash -x export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib -ruby server.rb &>server.rb.out +export PYTHONPATH=~/.local/lib/python2.7/site-packages +ruby server2.rb ../example/conf.rb #&>server2.rb.out diff --git a/server.rb b/server.rb index def401e..922f103 100644..100755 --- a/server.rb +++ b/server.rb @@ -4,23 +4,57 @@ require 'sinatra' require 'sinatra/cross_origin' require 'nanomsg' require 'zipf' +require 'digest' -set :bind, '147.142.207.52' -set :port, 60666 +require_relative "#{ARGV[0]}" +INPUT = ReadFile.readlines INPUT_FILE +`mkdir -p #{WORK_DIR}/g` +def start_daemon cmd, name, addr + STDERR.write "> starting #{name} daemon\n" + cmd.gsub! '__ADDR__', addr + pid = fork do + exec cmd + end + sock = NanoMsg::PairSocket.new + sock.connect addr + STDERR.write ">> got #{sock.recv} from #{name}\n" + + return sock, pid +end + +def stop_all_daemons env + STDERR.write "shutting down all daemons\n" + env.each { |name,p| + p[:socket].send "shutdown" + STDERR.write ">> #{name} is #{p[:socket].recv}" + } +end + +daemons = { + :extractor => "python -m cdec.sa.extract -c #{DATA_DIR}/sa.ini --online -u -S '__ADDR__'", + :aligner_fwd => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/forward.params --sock_url '__ADDR__'", + :aligner_back => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/backward.params --sock_url '__ADDR__'", + :atools => "#{CDEC_NET}/utils/atools_net -c grow-diag-final-and -S '__ADDR__'", + :dtrain => "#{CDEC_NET}/training/dtrain/dtrain_net_interface -c #{DATA_DIR}/dtrain.ini -o #{WORK_DIR}/weights.final -a '__ADDR__'" +} + +env = {} +port = BEGIN_PORT_RANGE +daemons.each { |name,cmd| + sock, pid = start_daemon cmd, name, "tcp://127.0.0.1:#{port}" + env[name] = { :socket => sock, :pid => pid } + port += 1 +} + +set :bind, SERVER_IP +set :port, WEB_PORT set :allow_origin, :any set :allow_methods, [:get, :post, :options] set :allow_credentials, true set :max_age, "1728000" set :expose_headers, ['Content-Type'] -sock = NanoMsg::PairSocket.new -addr = "ipc:///tmp/dtrain.ipc" -sock.bind addr - -input = ReadFile.readlines_strip "model/src.gz" -input_ = Array.new input - get '/' do cross_origin "Nothing to see here." @@ -29,29 +63,71 @@ end get '/next' do cross_origin if params[:example] - sock.send params[:example].strip - puts params.to_s - sock.recv # dummy + source, reference = params[:example].strip.split(" ||| ") + # update weights + grammar = "#{WORK_DIR}/g/#{Digest::SHA256.hexdigest(source)}.grammar" + annotated_source = "<seg grammar=\"#{grammar}\"> #{source} </seg>" + msg = "#{annotated_source} ||| #{reference}" + STDERR.write "[dtrain] > sending '#{msg}' for update\n" + env[:dtrain][:socket].send msg + STDERR.write "[dtrain] waiting for confirmation ...\n" + STDERR.write "[dtrain] < says it's #{env[:dtrain][:socket].recv}\n" + # update grammar extractor + # get forward alignment + msg = "#{source} ||| #{reference}" + STDERR.write "[aligner_fwd] > sending '#{msg}' for forced alignment\n" + env[:aligner_fwd][:socket].send msg + STDERR.write "[aligner_fwd] waiting for alignment ...\n" + a_fwd = env[:aligner_fwd][:socket].recv.strip + STDERR.write "[aligner_fwd] < got alignment: '#{a_fwd}'\n" + # get backward alignment + msg = "#{source} ||| #{reference}" + STDERR.write "[aligner_back] > sending '#{msg}' for forced alignment\n" + env[:aligner_back][:socket].send msg + STDERR.write "[aligner_back] waiting for alignment ...\n" + a_back = env[:aligner_back][:socket].recv.strip + STDERR.write "[aligner_back] < got alignment: '#{a_back}'\n" + # combine alignments + msg = "#{a_fwd} ||| #{a_back}" + STDERR.write "[atools] > sending '#{msg}' to combine alignments\n" + env[:atools][:socket].send msg + STDERR.write "[atools] waiting for alignment ...\n" + a = env[:atools][:socket].recv.strip + STDERR.write "[atools] < got alignment '#{a}'\n" + # actual extractor + msg = "TEST ||| #{source} ||| #{reference} ||| #{a}" + STDERR.write "[extractor] > sending '#{msg}' for learning\n" + env[:extractor][:socket].send "TEST ||| #{source} ||| #{reference} ||| #{a}" + STDERR.write "[extractor] waiting for confirmation ...\n" + STDERR.write "[extractor] < got '#{env[:extractor][:socket].recv}'\n" end - src = input.shift - if !src - puts "end of input, sending 'fi'" + source = INPUT.shift + if !source # input is done -> displays 'Thank you!' + STDERR.write ">>> end of input, sending 'fi'\n" "fi" - else - puts "sending source '#{src}' ..." - sock.send "act:translate ||| #{src}" - puts "done" - sleep 1 - puts "waiting for translation ..." - t = sock.recv - puts "got translation '#{t}'" - "#{src}\t#{t}" + else # translate next sentence + source.strip! + # generate grammar for current sentence + grammar = "#{WORK_DIR}/g/#{Digest::SHA256.hexdigest(source)}.grammar" # FIXME: keep grammars? + msg = "- ||| #{source} ||| #{grammar}" # FIXME: content identifier useful? + STDERR.write "[extractor] > asking to generate grammar: '#{msg}'\n" + env[:extractor][:socket].send msg + STDERR.write "[extractor] waiting for confirmation ...\n" + STDERR.write "[extractor] < says it generated #{env[:extractor][:socket].recv.strip}\n" + # translation + msg = "act:translate ||| <seg grammar=\"#{grammar}\"> #{source} </seg>" + STDERR.write "[dtrain] > asking to translate: '#{msg}'\n" + env[:dtrain][:socket].send msg + STDERR.write "[dtrain] waiting for translation ...\n" + transl = env[:dtrain][:socket].recv.encode "UTF-8" + STDERR.write "[dtrain] < received translation: '#{transl}'\n" + "#{source}\t#{transl}" end end -get '/reset' do - cross_origin - input = Array.new input_ - "done" +# stop daemons and shut down server +get '/shutdown' do + stop_all_daemons env + exit end |