From 96d954177a95fe69ebebf8ebdbab63434bf4c600 Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Tue, 3 May 2016 12:59:19 +0200 Subject: not using online grammar extractor --- views/debug.haml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/views/debug.haml b/views/debug.haml index 4ebb0a0..1bc0fd1 100644 --- a/views/debug.haml +++ b/views/debug.haml @@ -43,10 +43,13 @@ %a.ajax{:tgt => "/reset_weights", :href => "#controls"} Reset weights %li %a.ajax{:tgt => "/reset_learning_rates", :href => "#controls"} Reset learning rates + / + %li + %a.ajax{:tgt => "/reset_extractor", :href => "#controls"} Reset extractor %li - %a.ajax{:tgt => "/reset_extractor", :href => "#controls"} Reset extractor + %a.ajax{:tgt => "/reset_grammars", :href => "#controls"} Reset grammars %li - %a.ajax{:tgt => "/reset_new_rules", :href => "#controls"} Reset new rules + %a.ajax{:tgt => "/reset_new_rules", :href => "#controls"} Reset new rules / %li %a.ajax{:tgt => "/shutdown", :href => "#controls"} Initiate shutdown @@ -299,9 +302,9 @@ %tr %td Shape_* %td.left Indicator features for rule shapes (39 in total) - %tr - %td IsSupportedOnline - %td.left Rules with support from local context (added by Denkowski's online suffix array extractor) + /=%tr + /= %td IsSupportedOnline + /= %td.left Rules with support from local context (added by Denkowski's online suffix array extractor) %p.up %a{ :href => "#" } ^ up -- cgit v1.2.3 From 54bc620c309d471989824d5de50915728b2f3afc Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Tue, 3 May 2016 14:22:07 +0200
Subject: mv
---
.htaccess | 2 +-
README.md | 25 ++++++++++++++++++++-----
external/lfpe-apache | 8 ++++----
inc/db.inc.php | 2 +-
js/interface.js | 5 +++--
static/pattr-abstracts.html | 10 +++++-----
util/run_server | 22 ++++++++++++----------
7 files changed, 46 insertions(+), 28 deletions(-)
diff --git a/.htaccess b/.htaccess
index a2cb1b7..cf61c47 100644
--- a/.htaccess
+++ b/.htaccess
@@ -1,5 +1,5 @@
AuthName "Post-Editing Interface"
AuthType Basic
-AuthUserFile /fast_scratch/simianer/lfpe/.htpasswd
+AuthUserFile /srv/postedit/.htpasswd
require valid-user
diff --git a/README.md b/README.md
index 403d745..a3d4e96 100644
--- a/README.md
+++ b/README.md
@@ -2,20 +2,35 @@
Post-editing interface for learning from post-edited machine translations.
# Setup
+
+`
+ export BASE_DIR=/srv/postedit
+`
+
## nanomsg lib
- export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/lib/nanomsg-0.5-beta/lib
+`
+ export LD_LIBRARY_PATH=$BASE_DIR/lib/nanomsg-0.5-beta/lib
+`
## ruby
- [see $(pwd)/lib/ruby/gems/nanomsg-0.4.0/ext/extconf.rb]
- gem install nanomsg -i $(pwd)/lib/ruby
- export GEM_PATH=/fast_scratch/simianer/lfpe/lib/ruby/:$GEM_PATH
+`
+ [see $BASE_DIR/lib/ruby/gems/nanomsg-0.4.0/ext/extconf.rb]
+ gem install nanomsg -i $BSAE_DIR/lib/ruby
+ export GEM_PATH=$BASE_DIR/lib/ruby/:$GEM_PATH
+`
## iptables
+`
iptables -A INPUT -i eth0 -p tcp -m multiport --dports 50000:50100 -j ACCEPT
+`
## apache
+`
ln -s /etc/apache2/sites-available/lfpe /etc/apache2/sites-enabled/020-lfpe
+`
## python
- export PYTHONPATH=/fast_scratch/simianer/lfpe/lib/python:$PYTHONPATH
+`
+ export PYTHONPATH=$BASE_DIR/lib/python:$PYTHONPATH
+`
diff --git a/external/lfpe-apache b/external/lfpe-apache
index e4de4be..eec5e5c 100644
--- a/external/lfpe-apache
+++ b/external/lfpe-apache
@@ -3,13 +3,13 @@
ServerAdmin simianer@cl.uni-heidelberg.de
- DocumentRoot /fast_scratch/simianer/lfpe/lfpe
+ DocumentRoot /srv/postedit/lfpe
- ErrorLog /fast_scratch/simianer/lfpe/lfpe/logs/apache2.error.log
+ ErrorLog /srv/postedit/lfpe/logs/apache2.error.log
LogLevel warn
- CustomLog /fast_scratch/simianer/lfpe/lfpe/logs/apache2.access.log combined
+ CustomLog /srv/postedit/lfpe/logs/apache2.access.log combined
-
Date: Tue, 3 May 2016 14:22:20 +0200
Subject: parameters
---
phrase2_extraction/phrase2_extraction.rb | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/phrase2_extraction/phrase2_extraction.rb b/phrase2_extraction/phrase2_extraction.rb
index 48dfd73..253df1b 100755
--- a/phrase2_extraction/phrase2_extraction.rb
+++ b/phrase2_extraction/phrase2_extraction.rb
@@ -5,9 +5,9 @@ require 'zipf'
module PhrasePhraseExtraction
DEBUG = false
-MAX_NT = 2 # Chiang: 2
-MAX_SEED_NUM_WORDS = 3 # Chiang: 10 words
-MAX_SRC_SZ = 3 # Chiang: 5 words
+MAX_NT = 1 # Chiang: 2
+MAX_SEED_NUM_WORDS = 10 # Chiang: 10 words
+MAX_SRC_SZ = 5 # Chiang: 5 words
FORBID_SRC_ADJACENT_SRC_NT = true # Chiang:true
class Rule
--
cgit v1.2.3
From 7829b5c7a6f82121b013d819aad3e8239cd2596f Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Tue, 3 May 2016 14:22:32 +0200
Subject: fixes
---
server.rb | 31 ++++++++++++++++++++++++-------
1 file changed, 24 insertions(+), 7 deletions(-)
diff --git a/server.rb b/server.rb
index 5a95131..9ca43b1 100755
--- a/server.rb
+++ b/server.rb
@@ -34,7 +34,7 @@ $oov_corrected.default = false
# #############################################################################
# Daemons
# #############################################################################
-DIR="/fast_scratch/simianer/lfpe"
+DIR="/srv/postedit"
$daemons = {
:tokenizer => "#{DIR}/lfpe/util/nanomsg_wrapper.rb -a tokenize -S '__ADDR__' -e #{EXTERNAL} -l #{TARGET_LANG}",
:tokenizer_src => "#{DIR}/lfpe/util/nanomsg_wrapper.rb -a tokenize -S '__ADDR__' -e #{EXTERNAL} -l #{SOURCE_LANG}",
@@ -123,6 +123,8 @@ def init
$env[name] = { :socket => sock, :pid => pid }
port += 1
}
+
+ send_recv :truecaser, "lOaD iT"
# lock file
`touch #{LOCK_FILE}`
$status = "Initialized" # status
@@ -406,7 +408,7 @@ def process_next reply
end
# - known rules
logmsg :server, "annotating known rules"
- $status = "Adding rules to grammar" # status
+ $status = "Adding rules to the grammar" # status
match = {}
$known_rules.each { |r|
_,src,tgt,_,_ = splitpipe r
@@ -423,11 +425,16 @@ def process_next reply
}
WriteFile.new(grammar).write all_rules.join("\n")+"\n"
# - additional rules
- $new_rules.each { |rule|
- logmsg :server, "adding rule '#{rule}' to grammar '#{grammar}'"
- s = splitpipe(rule)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- `echo "#{rule}" >> #{grammar}`
- }
+ logmsg :server, $new_rules.to_s
+ if $new_rules.size > 0
+ s = $new_rules.join "\n"
+ `echo "#{s}" >> #{grammar}`
+ end
+ #$new_rules.each { |rule|
+ # logmsg :server, "adding rule '#{rule}' to grammar '#{grammar}'"
+ # s = splitpipe(rule)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ # `echo "#{rule}" >> #{grammar}`
+ #}
# 2. check for OOVs
if !$oov_corrected[$db['progress']]
$status = "Checking for OOVs" # status
@@ -664,11 +671,21 @@ get '/reset_extractor' do # reset grammar extractor
return "reset extractor: done"
end
+get '/reset_grammars' do # reset grammar extractor
+ logmsg :server, "reset grammars"
+ return "locked" if $lock
+ `cp #{SESSION_DIR}/g/original/* #{SESSION_DIR}/g/`
+ $last_reply = nil
+
+ return "reset grammars: done"
+end
+
get '/reset_new_rules' do # removed learned rules
$new_rules.clear
$known_rules.clear
`rm #{WORK_DIR}/*.*_rules`
`rm #{WORK_DIR}/g/*`
+ $last_reply = nil
return "reset new rules: done"
end
--
cgit v1.2.3
From 96d6f8a3fc83e075f8054d48ea8c6973ca534b65 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Fri, 6 May 2016 11:14:48 +0200
Subject: fix
---
util/run_beta_test | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/util/run_beta_test b/util/run_beta_test
index a0fe20f..9180b52 100755
--- a/util/run_beta_test
+++ b/util/run_beta_test
@@ -1,8 +1,8 @@
#!/bin/zsh -x
-cd /fast_scratch/simianer/lfpe/lfpe/util
+cd /srv/postedit/lfpe/util
./kill; ./kill; ./kill;
-for i in ../../sessions/product_de-en_beta_test_*; do
+for i in `ls -1 ../../sessions/ | grep -v "_1_" | grep -v data | grep -v toy`; do
echo $i
echo $(basename $i)
./run_server $(basename $i) &; sleep 600;
--
cgit v1.2.3
From 6bd7135e6039b0682f49234e42451077413f0bd9 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Tue, 10 May 2016 10:49:09 +0200
Subject: count clicks and keystrokes, fix bug in rule addition, good params
and improvement for phrase2 extraction
---
interface.php | 5 ++--
js/interface.js | 47 +++++++++++++++++++++++++++++++-
phrase2_extraction/phrase2_extraction.rb | 38 +++++++++++++++++++++++---
server.rb | 10 ++++---
views/debug.haml | 2 ++
5 files changed, 91 insertions(+), 11 deletions(-)
diff --git a/interface.php b/interface.php
index 8df7fd0..46b07f0 100644
--- a/interface.php
+++ b/interface.php
@@ -33,7 +33,7 @@
EP-0005734-A1 (H01H)
+EP-0005734-A1 (H01H)
An electromagnetically operated switchgear, e.g. an electrical contactor, has a magnetic iron core which consists of two identical E- shaped magnet parts, specifically the magnet core (8), carrying the winding, and the armature (7).
@@ -38,7 +38,7 @@ div.ex:hover {
-EP-0003301-A1 (A01N,C07C)
+EP-0003301-A1 (A01N,C07C)
They have strong insecticidal, acaricidal and nematicidal properties.
@@ -50,7 +50,7 @@ div.ex:hover {
-EP-0003578-A2 (F25B)
+EP-0003578-A2 (F25B)
The invention relates to the refrigerant circuit (1) of a heat pump.
@@ -78,7 +78,7 @@ div.ex:hover {
-EP-0002017-A1 (C25B)
+EP-0002017-A1 (C25B)
Anodes for electrochemical purposes are composed of an electrically conducting support body and a layer of metallic silicon and/or germanium applied thereto.
@@ -94,7 +94,7 @@ div.ex:hover {
- EP-0018427-A1 (G05B)
+ EP-0018427-A1 (G05B)
Electrical control circuit comprising a signal generator section (20) which generates successive uniform pulses in dependence on a control signal.
diff --git a/util/run_server b/util/run_server
index 7d45583..a4b7a6c 100755
--- a/util/run_server
+++ b/util/run_server
@@ -1,15 +1,17 @@
#!/bin/bash -x
-export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/lib/nanomsg-0.5-beta/lib:$LD_LIBRARY_PATH
-export PYTHONPATH=/fast_scratch/simianer/lfpe/lib/python:$PYTHONPATH
-export GEM_PATH=/fast_scratch/simianer/lfpe/lib/ruby/:$GEM_PATH
-UTIL=/fast_scratch/simianer/lfpe/lfpe/util
+BASE_DIR=/srv/postedit
+export LD_LIBRARY_PATH=$BASE_DIR/lib/nanomsg-0.5-beta/lib:$LD_LIBRARY_PATH
+export PYTHONPATH=$BASE_DIR/lib/python:$PYTHONPATH
+export GEM_PATH=$BASE_DIR/lib/ruby/:$GEM_PATH
+UTIL=$BASE_DIR/lfpe/util
SESSION=$1
-DIR=/fast_scratch/simianer/lfpe/sessions/$SESSION
+SESSION_DIR=$BASE_DIR/sessions/$SESSION
-rm $DIR/work/lockfile
-rm -r $DIR/work/
-mkdir -p $DIR/work
-cp $DIR/data.json.original $DIR/data.json
-$UTIL/../server.rb $DIR/conf.rb &>$DIR/work/session.out
+rm $SESSION_DIR/work/lockfile
+rm -r $SESSION_DIR/work/
+mkdir -p $SESSION_DIR/work
+cp $SESSION_DIR/data.json.original $SESSION_DIR/data.json
+cp $SESSION_DIR/g/original/* $SESSION_DIR/g/
+$UTIL/../server.rb $SESSION_DIR/conf.rb &>$SESSION_DIR/work/session.out
--
cgit v1.2.3
From b74ed595f149986474931f9328f84348a5652730 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
@@ -52,7 +52,7 @@ Note that the source word may be distorted.
Target:
-
+