summaryrefslogtreecommitdiff
path: root/server.rb
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2015-10-30 17:10:08 +0100
committerPatrick Simianer <p@simianer.de>2015-10-30 17:10:08 +0100
commit126469db91682e0fe41a668c5f8f341053f81dcf (patch)
tree3760b7e2775a4b683b0440befc8fccd99e6b7339 /server.rb
parent0f5a2e0effc117985e5fe4b94e9c60af4a519a15 (diff)
source-side detokenization for source groups
Diffstat (limited to 'server.rb')
-rwxr-xr-xserver.rb25
1 files changed, 14 insertions, 11 deletions
diff --git a/server.rb b/server.rb
index a404b6c..0b39c1d 100755
--- a/server.rb
+++ b/server.rb
@@ -27,15 +27,16 @@ end
# #############################################################################
DIR="/fast_scratch/simianer/lfpe"
$daemons = {
- :tokenizer => "#{DIR}/lfpe/util/wrapper.rb -a tokenize -S '__ADDR__' -e #{EXTERNAL} -l #{TARGET_LANG}",
- :detokenizer => "#{DIR}/lfpe/util/wrapper.rb -a detokenize -S '__ADDR__' -e #{EXTERNAL} -l #{TARGET_LANG}",
- :truecaser => "#{DIR}/lfpe/util/wrapper.rb -a truecase -S '__ADDR__' -e #{EXTERNAL} -t #{SESSION_DIR}/truecase.model",
+ :tokenizer => "#{DIR}/lfpe/util/wrapper.rb -a tokenize -S '__ADDR__' -e #{EXTERNAL} -l #{TARGET_LANG}",
+ :detokenizer => "#{DIR}/lfpe/util/wrapper.rb -a detokenize -S '__ADDR__' -e #{EXTERNAL} -l #{TARGET_LANG}",
+ :detokenizer_src => "#{DIR}/lfpe/util/wrapper.rb -a detokenize -S '__ADDR__' -e #{EXTERNAL} -l #{SOURCE_LANG}",
+ :truecaser => "#{DIR}/lfpe/util/wrapper.rb -a truecase -S '__ADDR__' -e #{EXTERNAL} -t #{SESSION_DIR}/truecase.model",
#:lowercaser => "#{DIR}/lfpe/util/wrapper.rb -a lowercase -S '__ADDR__' -e #{EXTERNAL}",
- :dtrain => "#{CDEC}/training/dtrain/dtrain_net_interface -c #{SESSION_DIR}/dtrain.ini -d #{WORK_DIR}/dtrain.debug.json -o #{WORK_DIR}/weights -a '__ADDR__' -E",
- :extractor => "python -m cdec.sa.extract -c #{SESSION_DIR}/sa.ini --online -u -S '__ADDR__'",
- :aligner_fwd => "#{CDEC}/word-aligner/net_fa -f #{SESSION_DIR}/forward.params -m #{FWD_MEAN_SRCLEN_MULT} -T #{FWD_TENSION} --sock_url '__ADDR__'",
- :aligner_back => "#{CDEC}/word-aligner/net_fa -f #{SESSION_DIR}/backward.params -m #{BACK_MEAN_SRCLEN_MULT} -T #{BACK_TENSION} --sock_url '__ADDR__'",
- :atools => "#{CDEC}/utils/atools_net -c grow-diag-final-and -S '__ADDR__'"
+ :dtrain => "#{CDEC}/training/dtrain/dtrain_net_interface -c #{SESSION_DIR}/dtrain.ini -d #{WORK_DIR}/dtrain.debug.json -o #{WORK_DIR}/weights -a '__ADDR__' -E",
+ :extractor => "python -m cdec.sa.extract -c #{SESSION_DIR}/sa.ini --online -u -S '__ADDR__'",
+ :aligner_fwd => "#{CDEC}/word-aligner/net_fa -f #{SESSION_DIR}/forward.params -m #{FWD_MEAN_SRCLEN_MULT} -T #{FWD_TENSION} --sock_url '__ADDR__'",
+ :aligner_back => "#{CDEC}/word-aligner/net_fa -f #{SESSION_DIR}/backward.params -m #{BACK_MEAN_SRCLEN_MULT} -T #{BACK_TENSION} --sock_url '__ADDR__'",
+ :atools => "#{CDEC}/utils/atools_net -c grow-diag-final-and -S '__ADDR__'"
}
# #############################################################################
@@ -292,9 +293,11 @@ get '/next' do # (receive post-edit, update models), send next translation
obj["progress"]= $db['progress']
obj["raw_source"] = raw_source
w_idx = 0
- obj["source_groups"].each_index { |j|
- a = obj["source_groups"][j].split
- a.each_with_index
+ obj["source_groups"][0][0] = obj["source_groups"][0][0].upcase
+ obj["source_groups"].each_with_index { |i,j|
+ prev = obj["source_groups"][j][0]
+ obj["source_groups"][j] = send_recv(:detokenizer_src, obj["source_groups"][j]).strip
+ obj["source_groups"][j][0]=prev if j > 0
}
# save
$db["mt_raw"] = obj["transl"]