From aa79a39c1c98313d73eed107623fe435c46acc42 Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Wed, 13 May 2015 17:46:03 +0200
Subject: full feature set
---
model/cdec.ini | 28 ----------
model/dtrain.ini | 4 --
model/grammar.gz | Bin 153 -> 0 bytes
model/nc-wmt11.en.srilm.gz | Bin 16017291 -> 0 bytes
model/run | 11 ----
model/src.gz | Bin 128 -> 0 bytes
model/weights.init | 12 -----
run_server | 3 +-
server.rb | 132 +++++++++++++++++++++++++++++++++++----------
9 files changed, 106 insertions(+), 84 deletions(-)
delete mode 100644 model/cdec.ini
delete mode 100644 model/dtrain.ini
delete mode 100644 model/grammar.gz
delete mode 100644 model/nc-wmt11.en.srilm.gz
delete mode 100755 model/run
delete mode 100644 model/src.gz
delete mode 100644 model/weights.init
mode change 100644 => 100755 server.rb
diff --git a/model/cdec.ini b/model/cdec.ini
deleted file mode 100644
index 36f53ac..0000000
--- a/model/cdec.ini
+++ /dev/null
@@ -1,28 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
-scfg_max_span_limit=15
-intersection_strategy=cube_pruning
-cubepruning_pop_limit=200
-grammar=grammar.gz
-feature_function=WordPenalty
-feature_function=KLanguageModel nc-wmt11.en.srilm.gz
-# all currently working feature functions for translation:
-# (with those features active that were used in the ACL paper)
-#feature_function=ArityPenalty
-#feature_function=CMR2008ReorderingFeatures
-#feature_function=Dwarf
-#feature_function=InputIndicator
-#feature_function=LexNullJump
-#feature_function=NewJump
-#feature_function=NgramFeatures
-#feature_function=NonLatinCount
-#feature_function=OutputIndicator
-feature_function=RuleIdentityFeatures
-feature_function=RuleSourceBigramFeatures
-feature_function=RuleTargetBigramFeatures
-feature_function=RuleShape
-#feature_function=LexicalFeatures 1 1 1
-#feature_function=SourceSpanSizeFeatures
-#feature_function=SourceWordPenalty
-#feature_function=SpanFeatures
-weights=weights.init
diff --git a/model/dtrain.ini b/model/dtrain.ini
deleted file mode 100644
index b6d29bb..0000000
--- a/model/dtrain.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-decoder_conf=./cdec.ini # config for cdec
-k=100 # use 100best lists
-N=4 # optimize (approx.) BLEU4
-margin=0.0 # perceptron's margin
diff --git a/model/grammar.gz b/model/grammar.gz
deleted file mode 100644
index 8c5f570..0000000
Binary files a/model/grammar.gz and /dev/null differ
diff --git a/model/nc-wmt11.en.srilm.gz b/model/nc-wmt11.en.srilm.gz
deleted file mode 100644
index 7ce8105..0000000
Binary files a/model/nc-wmt11.en.srilm.gz and /dev/null differ
diff --git a/model/run b/model/run
deleted file mode 100755
index 2ab2498..0000000
--- a/model/run
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash -x
-
-export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib
-
-while true; do
-/fast_scratch/simianer/lfpe/cdec_net/training/dtrain/dtrain_net_interface -c dtrain.ini -a ipc:///tmp/dtrain.ipc &>dtrain.out &
-PID=$!
-sleep 600
-kill $PID
-done
-
diff --git a/model/src.gz b/model/src.gz
deleted file mode 100644
index 3f8f895..0000000
Binary files a/model/src.gz and /dev/null differ
diff --git a/model/weights.init b/model/weights.init
deleted file mode 100644
index 0d09f9f..0000000
--- a/model/weights.init
+++ /dev/null
@@ -1,12 +0,0 @@
-CountEF 0.1
-EgivenFCoherent -0.1
-Glue 0.01
-IsSingletonF -0.01
-IsSingletonFE -0.01
-LanguageModel 0.1
-LanguageModel_OOV -1
-MaxLexFgivenE -0.1
-MaxLexEgivenF -0.1
-PassThrough -0.1
-SampleCountF -0.1
-WordPenalty -0.1
diff --git a/run_server b/run_server
index 5747499..39c92ae 100755
--- a/run_server
+++ b/run_server
@@ -1,5 +1,6 @@
#!/bin/bash -x
export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib
-ruby server.rb &>server.rb.out
+export PYTHONPATH=~/.local/lib/python2.7/site-packages
+ruby server2.rb ../example/conf.rb #&>server2.rb.out
diff --git a/server.rb b/server.rb
old mode 100644
new mode 100755
index def401e..922f103
--- a/server.rb
+++ b/server.rb
@@ -4,23 +4,57 @@ require 'sinatra'
require 'sinatra/cross_origin'
require 'nanomsg'
require 'zipf'
+require 'digest'
-set :bind, '147.142.207.52'
-set :port, 60666
+require_relative "#{ARGV[0]}"
+INPUT = ReadFile.readlines INPUT_FILE
+`mkdir -p #{WORK_DIR}/g`
+def start_daemon cmd, name, addr
+ STDERR.write "> starting #{name} daemon\n"
+ cmd.gsub! '__ADDR__', addr
+ pid = fork do
+ exec cmd
+ end
+ sock = NanoMsg::PairSocket.new
+ sock.connect addr
+ STDERR.write ">> got #{sock.recv} from #{name}\n"
+
+ return sock, pid
+end
+
+def stop_all_daemons env
+ STDERR.write "shutting down all daemons\n"
+ env.each { |name,p|
+ p[:socket].send "shutdown"
+ STDERR.write ">> #{name} is #{p[:socket].recv}"
+ }
+end
+
+daemons = {
+ :extractor => "python -m cdec.sa.extract -c #{DATA_DIR}/sa.ini --online -u -S '__ADDR__'",
+ :aligner_fwd => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/forward.params --sock_url '__ADDR__'",
+ :aligner_back => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/backward.params --sock_url '__ADDR__'",
+ :atools => "#{CDEC_NET}/utils/atools_net -c grow-diag-final-and -S '__ADDR__'",
+ :dtrain => "#{CDEC_NET}/training/dtrain/dtrain_net_interface -c #{DATA_DIR}/dtrain.ini -o #{WORK_DIR}/weights.final -a '__ADDR__'"
+}
+
+env = {}
+port = BEGIN_PORT_RANGE
+daemons.each { |name,cmd|
+ sock, pid = start_daemon cmd, name, "tcp://127.0.0.1:#{port}"
+ env[name] = { :socket => sock, :pid => pid }
+ port += 1
+}
+
+set :bind, SERVER_IP
+set :port, WEB_PORT
set :allow_origin, :any
set :allow_methods, [:get, :post, :options]
set :allow_credentials, true
set :max_age, "1728000"
set :expose_headers, ['Content-Type']
-sock = NanoMsg::PairSocket.new
-addr = "ipc:///tmp/dtrain.ipc"
-sock.bind addr
-
-input = ReadFile.readlines_strip "model/src.gz"
-input_ = Array.new input
-
get '/' do
cross_origin
"Nothing to see here."
@@ -29,29 +63,71 @@ end
get '/next' do
cross_origin
if params[:example]
- sock.send params[:example].strip
- puts params.to_s
- sock.recv # dummy
+ source, reference = params[:example].strip.split(" ||| ")
+ # update weights
+ grammar = "#{WORK_DIR}/g/#{Digest::SHA256.hexdigest(source)}.grammar"
+ annotated_source = "