summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--[-rwxr-xr-x].htaccess0
-rw-r--r--README.md25
-rw-r--r--[l---------]lfpe.conf9
-rw-r--r--model/cdec.ini28
-rw-r--r--model/dtrain.ini4
-rw-r--r--model/grammar.gzbin153 -> 0 bytes
-rw-r--r--model/nc-wmt11.en.srilm.gzbin16017291 -> 0 bytes
-rwxr-xr-xmodel/run11
-rw-r--r--model/src.gzbin128 -> 0 bytes
-rw-r--r--model/weights.init12
-rwxr-xr-xrun_server3
-rw-r--r--server.php59
-rwxr-xr-x[-rw-r--r--]server.rb132
13 files changed, 114 insertions, 169 deletions
diff --git a/.htaccess b/.htaccess
index 76a4aff..76a4aff 100755..100644
--- a/.htaccess
+++ b/.htaccess
diff --git a/README.md b/README.md
index 105f788..5c1cff1 100644
--- a/README.md
+++ b/README.md
@@ -1,27 +1,2 @@
# lfpe
-# TODO
-
-## Interface
-* dynamic enlargement of textarea (fix size)
-* alignment visualization
-* add interface to modify alignments
-* enable dynamic updating of visualization
-* [implement the view of a 'window' of translations with dynamic updating]
-* [implement document view, allow to choose specific segments]
-* session stats view
-
-## Backend
-* implement Denkowski's grammar updating
-
-## Interplay
-* user registration in PHP with database and user directory
-* multi-seat (start decoder/learner with associated models)
-* allow multiple servers
-* session management (stop decoders/learners, save models, pause/resume)
-* collect session statistics
-
-## Misc.
-* manual for students
-* build large-scale de-en patent SMT model [Patrick]
-
diff --git a/lfpe.conf b/lfpe.conf
index 98fd0fb..622fe31 120000..100644
--- a/lfpe.conf
+++ b/lfpe.conf
@@ -1 +1,8 @@
-/etc/apache2/conf.d/lfpe.conf \ No newline at end of file
+Alias /lfpe /fast_scratch/simianer/lfpe/lfpe/
+
+<Directory /fast_scratch/simianer/lfpe/lfpe>
+ Options +FollowSymLinks
+ AllowOverride All
+ order allow,deny
+ allow from all
+</Directory>
diff --git a/model/cdec.ini b/model/cdec.ini
deleted file mode 100644
index 36f53ac..0000000
--- a/model/cdec.ini
+++ /dev/null
@@ -1,28 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
-scfg_max_span_limit=15
-intersection_strategy=cube_pruning
-cubepruning_pop_limit=200
-grammar=grammar.gz
-feature_function=WordPenalty
-feature_function=KLanguageModel nc-wmt11.en.srilm.gz
-# all currently working feature functions for translation:
-# (with those features active that were used in the ACL paper)
-#feature_function=ArityPenalty
-#feature_function=CMR2008ReorderingFeatures
-#feature_function=Dwarf
-#feature_function=InputIndicator
-#feature_function=LexNullJump
-#feature_function=NewJump
-#feature_function=NgramFeatures
-#feature_function=NonLatinCount
-#feature_function=OutputIndicator
-feature_function=RuleIdentityFeatures
-feature_function=RuleSourceBigramFeatures
-feature_function=RuleTargetBigramFeatures
-feature_function=RuleShape
-#feature_function=LexicalFeatures 1 1 1
-#feature_function=SourceSpanSizeFeatures
-#feature_function=SourceWordPenalty
-#feature_function=SpanFeatures
-weights=weights.init
diff --git a/model/dtrain.ini b/model/dtrain.ini
deleted file mode 100644
index b6d29bb..0000000
--- a/model/dtrain.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-decoder_conf=./cdec.ini # config for cdec
-k=100 # use 100best lists
-N=4 # optimize (approx.) BLEU4
-margin=0.0 # perceptron's margin
diff --git a/model/grammar.gz b/model/grammar.gz
deleted file mode 100644
index 8c5f570..0000000
--- a/model/grammar.gz
+++ /dev/null
Binary files differ
diff --git a/model/nc-wmt11.en.srilm.gz b/model/nc-wmt11.en.srilm.gz
deleted file mode 100644
index 7ce8105..0000000
--- a/model/nc-wmt11.en.srilm.gz
+++ /dev/null
Binary files differ
diff --git a/model/run b/model/run
deleted file mode 100755
index 2ab2498..0000000
--- a/model/run
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash -x
-
-export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib
-
-while true; do
-/fast_scratch/simianer/lfpe/cdec_net/training/dtrain/dtrain_net_interface -c dtrain.ini -a ipc:///tmp/dtrain.ipc &>dtrain.out &
-PID=$!
-sleep 600
-kill $PID
-done
-
diff --git a/model/src.gz b/model/src.gz
deleted file mode 100644
index 3f8f895..0000000
--- a/model/src.gz
+++ /dev/null
Binary files differ
diff --git a/model/weights.init b/model/weights.init
deleted file mode 100644
index 0d09f9f..0000000
--- a/model/weights.init
+++ /dev/null
@@ -1,12 +0,0 @@
-CountEF 0.1
-EgivenFCoherent -0.1
-Glue 0.01
-IsSingletonF -0.01
-IsSingletonFE -0.01
-LanguageModel 0.1
-LanguageModel_OOV -1
-MaxLexFgivenE -0.1
-MaxLexEgivenF -0.1
-PassThrough -0.1
-SampleCountF -0.1
-WordPenalty -0.1
diff --git a/run_server b/run_server
index 5747499..97bae1a 100755
--- a/run_server
+++ b/run_server
@@ -1,5 +1,6 @@
#!/bin/bash -x
export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib
-ruby server.rb &>server.rb.out
+export PYTHONPATH=~/.local/lib/python2.7/site-packages
+ruby server.rb ../example/conf.rb &>server.rb.out
diff --git a/server.php b/server.php
deleted file mode 100644
index 0683f12..0000000
--- a/server.php
+++ /dev/null
@@ -1,59 +0,0 @@
-<?php
-
-if($_POST['number'])
-{
-$varNumber = $_POST['number'];
-
-$mySourceFile = fopen("source.txt", "r") or die("Unable to open file!");
-
-for ($i = 1; $i <= $varNumber; $i++) {
- fgets($mySourceFile);
-}
-
-if (!feof($mySourceFile))
- {
- echo fgets($mySourceFile);
-
- }
- else
- {
- echo "The end of the input file.";
- }
-fclose($mySourceFile);
-}
-
-
-if($_POST['number_trgt'])
-{
-$varNumber = $_POST['number_trgt'];
-
-$myTargetFile = fopen("target.txt", "r") or die("Unable to open file!");
-
-for ($i = 1; $i <= $varNumber; $i++) {
- fgets($myTargetFile);
-}
-
-if (!feof($myTargetFile))
- {
- echo fgets($myTargetFile);
- }
- else
- {
- echo "The end of the input file.";
- }
-fclose($myTargetFile);
-}
-
-
-if($_POST['postedit'])
-{
-$varData = $_POST['postedit'];
-echo $varData;
-
-$myFile = "testFile.txt";
-file_put_contents($myFile, $varData, FILE_APPEND);
-file_put_contents($myFile, PHP_EOL, FILE_APPEND);
-
-}
-
-?>
diff --git a/server.rb b/server.rb
index def401e..2112638 100644..100755
--- a/server.rb
+++ b/server.rb
@@ -4,23 +4,57 @@ require 'sinatra'
require 'sinatra/cross_origin'
require 'nanomsg'
require 'zipf'
+require 'digest'
-set :bind, '147.142.207.52'
-set :port, 60666
+require_relative "#{ARGV[0]}"
+INPUT = ReadFile.readlines INPUT_FILE
+`mkdir -p #{WORK_DIR}/g`
+def start_daemon cmd, name, addr
+ STDERR.write "> starting #{name} daemon\n"
+ cmd.gsub! '__ADDR__', addr
+ pid = fork do
+ exec cmd
+ end
+ sock = NanoMsg::PairSocket.new
+ sock.connect addr
+ STDERR.write "< got #{sock.recv} from #{name}\n"
+
+ return sock, pid
+end
+
+def stop_all_daemons env
+ STDERR.write "shutting down all daemons\n"
+ env.each { |name,p|
+ p[:socket].send "shutdown"
+ STDERR.write "< #{name} is #{p[:socket].recv}\n"
+ }
+end
+
+daemons = {
+ :extractor => "python -m cdec.sa.extract -c #{DATA_DIR}/sa.ini --online -u -S '__ADDR__'",
+ :aligner_fwd => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/forward.params -m #{FWD_MEAN_SRCLEN_MULT} -T #{FWD_TENSION} --sock_url '__ADDR__'",
+ :aligner_back => "#{CDEC_NET}/word-aligner/net_fa -f #{DATA_DIR}/a/backward.params -m #{BACK_MEAN_SRCLEN_MULT} -T #{BACK_TENSION} --sock_url '__ADDR__'",
+ :atools => "#{CDEC_NET}/utils/atools_net -c grow-diag-final-and -S '__ADDR__'",
+ :dtrain => "#{CDEC_NET}/training/dtrain/dtrain_net_interface -c #{DATA_DIR}/dtrain.ini -o #{WORK_DIR}/weights.final -a '__ADDR__'" ##{DTRAIN_EXTRA}"
+}
+
+env = {}
+port = BEGIN_PORT_RANGE
+daemons.each { |name,cmd|
+ sock, pid = start_daemon cmd, name, "tcp://127.0.0.1:#{port}"
+ env[name] = { :socket => sock, :pid => pid }
+ port += 1
+}
+
+set :bind, SERVER_IP
+set :port, WEB_PORT
set :allow_origin, :any
set :allow_methods, [:get, :post, :options]
set :allow_credentials, true
set :max_age, "1728000"
set :expose_headers, ['Content-Type']
-sock = NanoMsg::PairSocket.new
-addr = "ipc:///tmp/dtrain.ipc"
-sock.bind addr
-
-input = ReadFile.readlines_strip "model/src.gz"
-input_ = Array.new input
-
get '/' do
cross_origin
"Nothing to see here."
@@ -29,29 +63,71 @@ end
get '/next' do
cross_origin
if params[:example]
- sock.send params[:example].strip
- puts params.to_s
- sock.recv # dummy
+ source, reference = params[:example].strip.split(" ||| ")
+ # update weights
+ grammar = "#{WORK_DIR}/g/#{Digest::SHA256.hexdigest(source)}.grammar"
+ annotated_source = "<seg grammar=\"#{grammar}\"> #{source} </seg>"
+ msg = "#{annotated_source} ||| #{reference}"
+ STDERR.write "[dtrain] > sending '#{msg}' for update\n"
+ env[:dtrain][:socket].send msg
+ STDERR.write "[dtrain] waiting for confirmation ...\n"
+ STDERR.write "[dtrain] < says it's #{env[:dtrain][:socket].recv}\n"
+ # update grammar extractor
+ # get forward alignment
+ msg = "#{source} ||| #{reference}"
+ STDERR.write "[aligner_fwd] > sending '#{msg}' for forced alignment\n"
+ env[:aligner_fwd][:socket].send msg
+ STDERR.write "[aligner_fwd] waiting for alignment ...\n"
+ a_fwd = env[:aligner_fwd][:socket].recv.strip
+ STDERR.write "[aligner_fwd] < got alignment: '#{a_fwd}'\n"
+ # get backward alignment
+ msg = "#{source} ||| #{reference}"
+ STDERR.write "[aligner_back] > sending '#{msg}' for forced alignment\n"
+ env[:aligner_back][:socket].send msg
+ STDERR.write "[aligner_back] waiting for alignment ...\n"
+ a_back = env[:aligner_back][:socket].recv.strip
+ STDERR.write "[aligner_back] < got alignment: '#{a_back}'\n"
+ # combine alignments
+ msg = "#{a_fwd} ||| #{a_back}"
+ STDERR.write "[atools] > sending '#{msg}' to combine alignments\n"
+ env[:atools][:socket].send msg
+ STDERR.write "[atools] waiting for alignment ...\n"
+ a = env[:atools][:socket].recv.strip
+ STDERR.write "[atools] < got alignment '#{a}'\n"
+ # actual extractor
+ msg = "TEST ||| #{source} ||| #{reference} ||| #{a}"
+ STDERR.write "[extractor] > sending '#{msg}' for learning\n"
+ env[:extractor][:socket].send "TEST ||| #{source} ||| #{reference} ||| #{a}"
+ STDERR.write "[extractor] waiting for confirmation ...\n"
+ STDERR.write "[extractor] < got '#{env[:extractor][:socket].recv}'\n"
end
- src = input.shift
- if !src
- puts "end of input, sending 'fi'"
+ source = INPUT.shift
+ if !source # input is done -> displays 'Thank you!'
+ STDERR.write ">>> end of input, sending 'fi'\n"
"fi"
- else
- puts "sending source '#{src}' ..."
- sock.send "act:translate ||| #{src}"
- puts "done"
- sleep 1
- puts "waiting for translation ..."
- t = sock.recv
- puts "got translation '#{t}'"
- "#{src}\t#{t}"
+ else # translate next sentence
+ source.strip!
+ # generate grammar for current sentence
+ grammar = "#{WORK_DIR}/g/#{Digest::SHA256.hexdigest(source)}.grammar" # FIXME: keep grammars?
+ msg = "- ||| #{source} ||| #{grammar}" # FIXME: content identifier useful?
+ STDERR.write "[extractor] > asking to generate grammar: '#{msg}'\n"
+ env[:extractor][:socket].send msg
+ STDERR.write "[extractor] waiting for confirmation ...\n"
+ STDERR.write "[extractor] < says it generated #{env[:extractor][:socket].recv.strip}\n"
+ # translation
+ msg = "act:translate ||| <seg grammar=\"#{grammar}\"> #{source} </seg>"
+ STDERR.write "[dtrain] > asking to translate: '#{msg}'\n"
+ env[:dtrain][:socket].send msg
+ STDERR.write "[dtrain] waiting for translation ...\n"
+ transl = env[:dtrain][:socket].recv.encode "UTF-8"
+ STDERR.write "[dtrain] < received translation: '#{transl}'\n"
+ "#{source}\t#{transl}"
end
end
-get '/reset' do
- cross_origin
- input = Array.new input_
- "done"
+# stop daemons and shut down server
+get '/shutdown' do
+ stop_all_daemons env
+ exit
end