summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2015-05-13 17:46:03 +0200
committerPatrick Simianer <p@simianer.de>2015-05-13 17:46:03 +0200
commitaa79a39c1c98313d73eed107623fe435c46acc42 (patch)
tree4d7bd49d2e38fdc357db6209868d3ff150de055d
parentaa97f229d6a6a1e5bc2bd133d0d39eefff888566 (diff)
full feature set
-rw-r--r--model/cdec.ini28
-rw-r--r--model/dtrain.ini4
-rw-r--r--model/grammar.gzbin153 -> 0 bytes
-rw-r--r--model/nc-wmt11.en.srilm.gzbin16017291 -> 0 bytes
-rwxr-xr-xmodel/run11
-rw-r--r--model/src.gzbin128 -> 0 bytes
-rw-r--r--model/weights.init12
-rwxr-xr-xrun_server3
-rwxr-xr-x[-rw-r--r--]server.rb132
9 files changed, 106 insertions, 84 deletions
diff --git a/model/cdec.ini b/model/cdec.ini
deleted file mode 100644
index 36f53ac..0000000
--- a/model/cdec.ini
+++ /dev/null
@@ -1,28 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
-scfg_max_span_limit=15
-intersection_strategy=cube_pruning
-cubepruning_pop_limit=200
-grammar=grammar.gz
-feature_function=WordPenalty
-feature_function=KLanguageModel nc-wmt11.en.srilm.gz
-# all currently working feature functions for translation:
-# (with those features active that were used in the ACL paper)
-#feature_function=ArityPenalty
-#feature_function=CMR2008ReorderingFeatures
-#feature_function=Dwarf
-#feature_function=InputIndicator
-#feature_function=LexNullJump
-#feature_function=NewJump
-#feature_function=NgramFeatures
-#feature_function=NonLatinCount
-#feature_function=OutputIndicator
-feature_function=RuleIdentityFeatures
-feature_function=RuleSourceBigramFeatures
-feature_function=RuleTargetBigramFeatures
-feature_function=RuleShape
-#feature_function=LexicalFeatures 1 1 1
-#feature_function=SourceSpanSizeFeatures
-#feature_function=SourceWordPenalty
-#feature_function=SpanFeatures
-weights=weights.init
diff --git a/model/dtrain.ini b/model/dtrain.ini
deleted file mode 100644
index b6d29bb..0000000
--- a/model/dtrain.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-decoder_conf=./cdec.ini # config for cdec
-k=100 # use 100best lists
-N=4 # optimize (approx.) BLEU4
-margin=0.0 # perceptron's margin
diff --git a/model/grammar.gz b/model/grammar.gz
deleted file mode 100644
index 8c5f570..0000000
--- a/model/grammar.gz
+++ /dev/null
Binary files differ
diff --git a/model/nc-wmt11.en.srilm.gz b/model/nc-wmt11.en.srilm.gz
deleted file mode 100644
index 7ce8105..0000000
--- a/model/nc-wmt11.en.srilm.gz
+++ /dev/null
Binary files differ
diff --git a/model/run b/model/run
deleted file mode 100755
index 2ab2498..0000000
--- a/model/run
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash -x
-
-export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib
-
-while true; do
-/fast_scratch/simianer/lfpe/cdec_net/training/dtrain/dtrain_net_interface -c dtrain.ini -a ipc:///tmp/dtrain.ipc &>dtrain.out &
-PID=$!
-sleep 600
-kill $PID
-done
-
diff --git a/model/src.gz b/model/src.gz
deleted file mode 100644
index 3f8f895..0000000
--- a/model/src.gz
+++ /dev/null
Binary files differ
diff --git a/model/weights.init b/model/weights.init
deleted file mode 100644
index 0d09f9f..0000000
--- a/model/weights.init
+++ /dev/null
@@ -1,12 +0,0 @@
-CountEF 0.1
-EgivenFCoherent -0.1
-Glue 0.01
-IsSingletonF -0.01
-IsSingletonFE -0.01
-LanguageModel 0.1
-LanguageModel_OOV -1
-MaxLexFgivenE -0.1
-MaxLexEgivenF -0.1
-PassThrough -0.1
-SampleCountF -0.1
-WordPenalty -0.1
diff --git a/run_server b/run_server
index 5747499..39c92ae 100755
--- a/run_server
+++ b/run_server
@@ -1,5 +1,6 @@
#!/bin/bash -x
export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib
-ruby server.rb &>server.rb.out
+export PYTHONPATH=~/.local/lib/python2.7/site-packages
+ruby server2.rb ../example/conf.rb #&>server2.rb.out
diff --git a/server.rb b/server.rb
index def401e..922f103 100644..100755
--- a/server.rb
+++ b/server.rb
@@ -4,23 +4,57 @@ require 'sinatra'
require 'sinatra/cross_origin'
require 'nanomsg'
require 'zipf'
+require 'digest'
-set :bind, '147.142.207.52'
-set :port, 60666
+require_relative "#{ARGV[0]}"
+INPUT = ReadFile.readlines INPUT_FILE
+`mkdir -p #{WORK_DIR}/g`
+def start_daemon cmd, name, addr
+ STDERR.write "> starting #{name} daemon\n"
+ cmd.gsub! '__ADDR__', addr
+ pid = fork do
+ exec cmd
+ end
+ sock = NanoMsg::PairSocket.new
+ sock.connect addr
+ STDERR.write ">> got #{sock.recv} from #{name}\n"
+
+ return sock, pid
+end
+
+def stop_all_daemons env
+ STDERR.write "shutting down all daemons\n"
+ env.each { |name,p|
+ p[:socket].send "shutdown"
+ STDERR.write ">> #{name} is #{p[:socket].recv}"
+ }
+end
+
+daemons = {
+ :extractor => "python -m cdec.sa.extract -c #{DATA_DIR}/sa.ini --online -u -S '__ADDR__'",
+ :aligner_fwd => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/forward.params --sock_url '__ADDR__'",
+ :aligner_back => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/backward.params --sock_url '__ADDR__'",
+ :atools => "#{CDEC_NET}/utils/atools_net -c grow-diag-final-and -S '__ADDR__'",
+ :dtrain => "#{CDEC_NET}/training/dtrain/dtrain_net_interface -c #{DATA_DIR}/dtrain.ini -o #{WORK_DIR}/weights.final -a '__ADDR__'"
+}
+
+env = {}
+port = BEGIN_PORT_RANGE
+daemons.each { |name,cmd|
+ sock, pid = start_daemon cmd, name, "tcp://127.0.0.1:#{port}"
+ env[name] = { :socket => sock, :pid => pid }
+ port += 1
+}
+
+set :bind, SERVER_IP
+set :port, WEB_PORT
set :allow_origin, :any
set :allow_methods, [:get, :post, :options]
set :allow_credentials, true
set :max_age, "1728000"
set :expose_headers, ['Content-Type']
-sock = NanoMsg::PairSocket.new
-addr = "ipc:///tmp/dtrain.ipc"
-sock.bind addr
-
-input = ReadFile.readlines_strip "model/src.gz"
-input_ = Array.new input
-
get '/' do
cross_origin
"Nothing to see here."
@@ -29,29 +63,71 @@ end
get '/next' do
cross_origin
if params[:example]
- sock.send params[:example].strip
- puts params.to_s
- sock.recv # dummy
+ source, reference = params[:example].strip.split(" ||| ")
+ # update weights
+ grammar = "#{WORK_DIR}/g/#{Digest::SHA256.hexdigest(source)}.grammar"
+ annotated_source = "<seg grammar=\"#{grammar}\"> #{source} </seg>"
+ msg = "#{annotated_source} ||| #{reference}"
+ STDERR.write "[dtrain] > sending '#{msg}' for update\n"
+ env[:dtrain][:socket].send msg
+ STDERR.write "[dtrain] waiting for confirmation ...\n"
+ STDERR.write "[dtrain] < says it's #{env[:dtrain][:socket].recv}\n"
+ # update grammar extractor
+ # get forward alignment
+ msg = "#{source} ||| #{reference}"
+ STDERR.write "[aligner_fwd] > sending '#{msg}' for forced alignment\n"
+ env[:aligner_fwd][:socket].send msg
+ STDERR.write "[aligner_fwd] waiting for alignment ...\n"
+ a_fwd = env[:aligner_fwd][:socket].recv.strip
+ STDERR.write "[aligner_fwd] < got alignment: '#{a_fwd}'\n"
+ # get backward alignment
+ msg = "#{source} ||| #{reference}"
+ STDERR.write "[aligner_back] > sending '#{msg}' for forced alignment\n"
+ env[:aligner_back][:socket].send msg
+ STDERR.write "[aligner_back] waiting for alignment ...\n"
+ a_back = env[:aligner_back][:socket].recv.strip
+ STDERR.write "[aligner_back] < got alignment: '#{a_back}'\n"
+ # combine alignments
+ msg = "#{a_fwd} ||| #{a_back}"
+ STDERR.write "[atools] > sending '#{msg}' to combine alignments\n"
+ env[:atools][:socket].send msg
+ STDERR.write "[atools] waiting for alignment ...\n"
+ a = env[:atools][:socket].recv.strip
+ STDERR.write "[atools] < got alignment '#{a}'\n"
+ # actual extractor
+ msg = "TEST ||| #{source} ||| #{reference} ||| #{a}"
+ STDERR.write "[extractor] > sending '#{msg}' for learning\n"
+ env[:extractor][:socket].send "TEST ||| #{source} ||| #{reference} ||| #{a}"
+ STDERR.write "[extractor] waiting for confirmation ...\n"
+ STDERR.write "[extractor] < got '#{env[:extractor][:socket].recv}'\n"
end
- src = input.shift
- if !src
- puts "end of input, sending 'fi'"
+ source = INPUT.shift
+ if !source # input is done -> displays 'Thank you!'
+ STDERR.write ">>> end of input, sending 'fi'\n"
"fi"
- else
- puts "sending source '#{src}' ..."
- sock.send "act:translate ||| #{src}"
- puts "done"
- sleep 1
- puts "waiting for translation ..."
- t = sock.recv
- puts "got translation '#{t}'"
- "#{src}\t#{t}"
+ else # translate next sentence
+ source.strip!
+ # generate grammar for current sentence
+ grammar = "#{WORK_DIR}/g/#{Digest::SHA256.hexdigest(source)}.grammar" # FIXME: keep grammars?
+ msg = "- ||| #{source} ||| #{grammar}" # FIXME: content identifier useful?
+ STDERR.write "[extractor] > asking to generate grammar: '#{msg}'\n"
+ env[:extractor][:socket].send msg
+ STDERR.write "[extractor] waiting for confirmation ...\n"
+ STDERR.write "[extractor] < says it generated #{env[:extractor][:socket].recv.strip}\n"
+ # translation
+ msg = "act:translate ||| <seg grammar=\"#{grammar}\"> #{source} </seg>"
+ STDERR.write "[dtrain] > asking to translate: '#{msg}'\n"
+ env[:dtrain][:socket].send msg
+ STDERR.write "[dtrain] waiting for translation ...\n"
+ transl = env[:dtrain][:socket].recv.encode "UTF-8"
+ STDERR.write "[dtrain] < received translation: '#{transl}'\n"
+ "#{source}\t#{transl}"
end
end
-get '/reset' do
- cross_origin
- input = Array.new input_
- "done"
+# stop daemons and shut down server
+get '/shutdown' do
+ stop_all_daemons env
+ exit
end