From 86959b66e35fdf0d8ed059271029d8b44a8c7608 Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Thu, 10 Mar 2016 15:52:35 +0100
Subject: server.rb: pregenerated grammars
---
server.rb | 30 +++++++++++++++++++++++-------
1 file changed, 23 insertions(+), 7 deletions(-)
diff --git a/server.rb b/server.rb
index 5fa291a..5a95131 100755
--- a/server.rb
+++ b/server.rb
@@ -27,6 +27,9 @@ if !FileTest.exist? LOCK_FILE # locked?
$env = {} # environment variables (socket connections to daemons)
end
$status = "Idle" # current server status
+$pregenerated_grammars = true # FIXME config
+$oov_corrected = {}
+$oov_corrected.default = false
# #############################################################################
# Daemons
@@ -39,10 +42,10 @@ $daemons = {
:detokenizer_src => "#{DIR}/lfpe/util/nanomsg_wrapper.rb -a detokenize -S '__ADDR__' -e #{EXTERNAL} -l #{SOURCE_LANG}",
:truecaser => "#{DIR}/lfpe/util/nanomsg_wrapper.rb -a truecase -S '__ADDR__' -e #{EXTERNAL} -t #{SESSION_DIR}/truecase.model",
:dtrain => "#{CDEC}/training/dtrain/dtrain_net_interface -c #{SESSION_DIR}/dtrain.ini -d #{WORK_DIR}/dtrain.debug.json -o #{WORK_DIR}/weights -a '__ADDR__' -E -R",
- :extractor => "python -m cdec.sa.extract -c #{SESSION_DIR}/extract.ini --online -u -S '__ADDR__'",
- :aligner_fwd => "#{CDEC}/word-aligner/net_fa -f #{SESSION_DIR}/forward.params -m #{FWD_MEAN_SRCLEN_MULT} -T #{FWD_TENSION} --sock_url '__ADDR__'",
- :aligner_back => "#{CDEC}/word-aligner/net_fa -f #{SESSION_DIR}/backward.params -m #{BACK_MEAN_SRCLEN_MULT} -T #{BACK_TENSION} --sock_url '__ADDR__'",
- :atools => "#{CDEC}/utils/atools_net -c grow-diag-final-and -S '__ADDR__'"
+ #:extractor => "python -m cdec.sa.extract -c #{SESSION_DIR}/extract.ini --online -u -S '__ADDR__'",
+ #:aligner_fwd => "#{CDEC}/word-aligner/net_fa -f #{SESSION_DIR}/forward.params -m #{FWD_MEAN_SRCLEN_MULT} -T #{FWD_TENSION} --sock_url '__ADDR__'",
+ #:aligner_back => "#{CDEC}/word-aligner/net_fa -f #{SESSION_DIR}/backward.params -m #{BACK_MEAN_SRCLEN_MULT} -T #{BACK_TENSION} --sock_url '__ADDR__'",
+ #:atools => "#{CDEC}/utils/atools_net -c grow-diag-final-and -S '__ADDR__'"
}
# #############################################################################
@@ -181,7 +184,8 @@ def process_next reply
if data["OOV"] # OOV corrections
$status = "Processing OOV corrections"
logmsg :server, "received OOV corrections" # status
- grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar"
+ #grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar"
+ grammar = "#{SESSION_DIR}/g/grammar.#{$db['progress']}"
src, tgt = splitpipe(data["correct"]) # format:src1\tsrc2\tsrc..|||tgt1\t..
tgt = clean_str tgt
src = src.split("\t").map { |i| URI.decode(i).strip }
@@ -195,6 +199,7 @@ def process_next reply
$new_rules << r
}
$confirmed = true
+ $oov_corrected[$db['progress']] = true
end
# received post-edit -> update models
# 0. save raw post-edit
@@ -234,7 +239,8 @@ def process_next reply
data["source_raw"].each { |i| f << URI.decode(i) }
# 2.5 new rule extraction
$status = "Extracting rules from post edit" # status
- grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar"
+ #grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar"
+ grammar = "#{SESSION_DIR}/g/grammar.#{$db['progress']}"
current_grammar_ids = {}
ReadFile.readlines_strip(grammar).each { |r|
s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
@@ -311,11 +317,13 @@ def process_next reply
logmsg :server, "updating ..."
# 4. update weights
# N.b.: this uses unaltered grammar [no new rules]
- grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar"
+ #grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar"
+ grammar = "#{SESSION_DIR}/g/grammar.#{$db['progress']}"
annotated_source = "