summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2016-05-10 10:59:49 +0200
committerPatrick Simianer <p@simianer.de>2016-05-10 10:59:49 +0200
commit8dd7a811559deafbe9262f11be2d592617b030a1 (patch)
treee30c76fbbea82f89dd10e56c35e70ec357bb24b4
parent189698c044eb3362e65d213c35f425694eba9f27 (diff)
parent6bd7135e6039b0682f49234e42451077413f0bd9 (diff)
Merge branch 'master' of github.com:pks/lfpe
-rw-r--r--.htaccess2
-rw-r--r--README.md25
-rw-r--r--external/lfpe-apache8
-rw-r--r--inc/db.inc.php2
-rw-r--r--interface.php5
-rw-r--r--js/interface.js52
-rwxr-xr-xphrase2_extraction/phrase2_extraction.rb36
-rwxr-xr-xserver.rb35
-rw-r--r--static/pattr-abstracts.html10
-rwxr-xr-xutil/run_beta_test4
-rwxr-xr-xutil/run_server22
-rw-r--r--views/debug.haml15
12 files changed, 167 insertions, 49 deletions
diff --git a/.htaccess b/.htaccess
index a2cb1b7..cf61c47 100644
--- a/.htaccess
+++ b/.htaccess
@@ -1,5 +1,5 @@
AuthName "Post-Editing Interface"
AuthType Basic
-AuthUserFile /fast_scratch/simianer/lfpe/.htpasswd
+AuthUserFile /srv/postedit/.htpasswd
require valid-user
diff --git a/README.md b/README.md
index 403d745..a3d4e96 100644
--- a/README.md
+++ b/README.md
@@ -2,20 +2,35 @@
Post-editing interface for learning from post-edited machine translations.
# Setup
+
+`
+ export BASE_DIR=/srv/postedit
+`
+
## nanomsg lib
- export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/lib/nanomsg-0.5-beta/lib
+`
+ export LD_LIBRARY_PATH=$BASE_DIR/lib/nanomsg-0.5-beta/lib
+`
## ruby
- [see $(pwd)/lib/ruby/gems/nanomsg-0.4.0/ext/extconf.rb]
- gem install nanomsg -i $(pwd)/lib/ruby
- export GEM_PATH=/fast_scratch/simianer/lfpe/lib/ruby/:$GEM_PATH
+`
+ [see $BASE_DIR/lib/ruby/gems/nanomsg-0.4.0/ext/extconf.rb]
+ gem install nanomsg -i $BSAE_DIR/lib/ruby
+ export GEM_PATH=$BASE_DIR/lib/ruby/:$GEM_PATH
+`
## iptables
+`
iptables -A INPUT -i eth0 -p tcp -m multiport --dports 50000:50100 -j ACCEPT
+`
## apache
+`
ln -s /etc/apache2/sites-available/lfpe /etc/apache2/sites-enabled/020-lfpe
+`
## python
- export PYTHONPATH=/fast_scratch/simianer/lfpe/lib/python:$PYTHONPATH
+`
+ export PYTHONPATH=$BASE_DIR/lib/python:$PYTHONPATH
+`
diff --git a/external/lfpe-apache b/external/lfpe-apache
index e4de4be..eec5e5c 100644
--- a/external/lfpe-apache
+++ b/external/lfpe-apache
@@ -3,13 +3,13 @@
ServerAdmin simianer@cl.uni-heidelberg.de
- DocumentRoot /fast_scratch/simianer/lfpe/lfpe
+ DocumentRoot /srv/postedit/lfpe
- ErrorLog /fast_scratch/simianer/lfpe/lfpe/logs/apache2.error.log
+ ErrorLog /srv/postedit/lfpe/logs/apache2.error.log
LogLevel warn
- CustomLog /fast_scratch/simianer/lfpe/lfpe/logs/apache2.access.log combined
+ CustomLog /srv/postedit/lfpe/logs/apache2.access.log combined
- <Directory /fast_scratch/simianer/lfpe/lfpe>
+ <Directory /srv/postedit/lfpe>
Options +FollowSymLinks
AllowOverride All
order allow,deny
diff --git a/inc/db.inc.php b/inc/db.inc.php
index f08eda5..ed45f7b 100644
--- a/inc/db.inc.php
+++ b/inc/db.inc.php
@@ -1,6 +1,6 @@
<?php
-$SESSION_DIR="/fast_scratch/simianer/lfpe/sessions";
+$SESSION_DIR="/srv/postedit/sessions";
$json = file_get_contents($SESSION_DIR."/".$_GET["key"]."/data.json");
$db = json_decode($json);
diff --git a/interface.php b/interface.php
index 8df7fd0..46b07f0 100644
--- a/interface.php
+++ b/interface.php
@@ -33,7 +33,7 @@
<tr>
<td align="right">Target:</td>
<td>
- <textarea id="target_textarea" name="target" cols="80" rows="1" onkeypress="catch_return(event);" disabled></textarea>
+ <textarea id="target_textarea" name="target" cols="80" rows="1" onkeypress="TEXT_handle_keypress(event);" disabled></textarea>
</td>
</tr>
</table>
@@ -52,7 +52,7 @@ Note that the source word may be distorted.
<div>
<button id="help_button" class="button" onclick="$('#help').toggle('blind')">Help</button>
<button id="pause_button" class='button' type="button" onclick="pause()">Pause</button>
- <button id="reset_button" class='button' type="button" onclick="DE_init()">Reset</button>
+ <button id="reset_button" class='button' type="button" onclick="reset()">Reset</button>
<button id="next" type="button" class='button' onclick="next();">Start/Continue</button>
<span id="status"><strong>Working: <span id="status_detail">...</span></strong> <img src="static/ajax-loader-large.gif" width="20px" /></span>
</div>
@@ -103,5 +103,6 @@ Note that the source word may be distorted.
<textarea style="display:none" id="ui_type" ><?php echo $_GET["ui_type"]; ?></textarea>
<textarea style="display:none" id="data" ></textarea>
<textarea style="display:none" id="original_svg" ></textarea>
+<textarea style="display:none" id="original_mt" ></textarea>
<!-- /Data -->
diff --git a/js/interface.js b/js/interface.js
index c946e0c..f009641 100644
--- a/js/interface.js
+++ b/js/interface.js
@@ -5,6 +5,9 @@
var data, // global data object
ui_type; // 't' (text) or 'g' (graphical)
+var TEXT_count_click=0,
+ TEXT_count_kbd=0;
+
/*
* cross-site request
*
@@ -108,6 +111,18 @@ var catch_return = function (e)
return false;
}
+var TEXT_handle_keypress = function (e)
+{
+ if (e.keyCode == 13) {
+ e.preventDefault();
+ next();
+ }
+
+ TEXT_count_kbd += 1;
+
+ return false;
+}
+
/*
* working/not working
*
@@ -230,7 +245,7 @@ var next = function ()
working();
// get metadata stored in DOM
- var base_url = "http://coltrane.cl.uni-heidelberg.de";
+ var base_url = "http://lemmy.cl.uni-heidelberg.de";
var port = document.getElementById("port").value;
var key = document.getElementById("key").value;
@@ -259,6 +274,8 @@ var next = function ()
post_edit = $.trim(target_textarea.value);
send_data["post_edit"] = encodeURIComponent(post_edit);
send_data['type'] = 't';
+ send_data["count_click"] = TEXT_count_click;
+ send_data["count_kbd"] = TEXT_count_kbd;
}
send_data["key"] = key;
@@ -334,7 +351,7 @@ var request_and_process_next = function ()
var last_post_edit = document.getElementById("last_post_edit");
// get metadata stored in DOM
- var base_url = "http://coltrane.cl.uni-heidelberg.de";
+ var base_url = "http://lemmy.cl.uni-heidelberg.de";
var port = document.getElementById("port").value;
var key = document.getElementById("key").value;
@@ -456,8 +473,11 @@ var request_and_process_next = function ()
target_textarea.rows = Math.round(translation.length/80+0.5);
//raw_source_textarea.rows = Math.round(raw_source.length/80+0.5);
target_textarea.focus();
+ $("#original_mt").val(target_textarea.value);
target_textarea.selectionStart = 0;
target_textarea.selectionEnd = 0;
+ TEXT_count_click = 0;
+ TEXT_count_kbd = 0;
// remember aux data in DOM
current_seg_id.value = id;
@@ -493,10 +513,35 @@ var init_text_editor = function ()
{
document.getElementById("target_textarea").value = "";
document.getElementById("target_textarea").setAttribute("disabled", "disabled");
+
+ TEXT_count_click = 0;
+ TEXT_count_kbd = 0;
+
+ $("#target_textarea").click(function () {
+ TEXT_count_click += 1;
+ });
return false;
}
+var get_ui_type = function ()
+{
+ return document.getElementById("ui_type").value;
+}
+
+var reset = function ()
+{
+ var ui_type = get_ui_type();
+ if (ui_type == "t") {
+ if (!$("#init").val()) return;
+ TEXT_count_click = 0;
+ TEXT_count_kbd = 0;
+ $("#target_textarea").val($("#original_mt").val());
+ } else if (ui_type == "g") {
+ DE_init()
+ }
+}
+
/*
* init site
*
@@ -514,7 +559,7 @@ $().ready(function()
not_working();
- ui_type = document.getElementById("ui_type").value;
+ ui_type = get_ui_type();
// graphical derivation editor
if (ui_type == "g") {
@@ -525,6 +570,7 @@ $().ready(function()
init_text_editor();
document.getElementById("textboxes").style.display = "block";
}
+
});
diff --git a/phrase2_extraction/phrase2_extraction.rb b/phrase2_extraction/phrase2_extraction.rb
index 48dfd73..547e0be 100755
--- a/phrase2_extraction/phrase2_extraction.rb
+++ b/phrase2_extraction/phrase2_extraction.rb
@@ -6,8 +6,8 @@ module PhrasePhraseExtraction
DEBUG = false
MAX_NT = 2 # Chiang: 2
-MAX_SEED_NUM_WORDS = 3 # Chiang: 10 words
-MAX_SRC_SZ = 3 # Chiang: 5 words
+MAX_SEED_NUM_WORDS = 4 # Chiang: 10 words, -> phrases!
+MAX_SRC_SZ = 10 # Chiang: 5 words, -> words!
FORBID_SRC_ADJACENT_SRC_NT = true # Chiang:true
class Rule
@@ -51,6 +51,21 @@ class Rule
return src_len
end
+ def len_src_w
+ src_len = 0
+ @source.each { |i|
+ if i.is_a? String
+ src_len += i.split.size #1
+ else
+ i.each { |j|
+ src_len += source_context[j].split.size
+ }
+ end
+ }
+
+ return src_len
+ end
+
def len_tgt
tgt_len = 0
@target.each { |i|
@@ -64,6 +79,21 @@ class Rule
return tgt_len
end
+ def len_tgt_w
+ tgt_len = 0
+ @target.each { |i|
+ if i.is_a? String
+ tgt_len += i.split.size
+ else
+ i.each { |j|
+ tgt_len += target_context[j].split.size
+ }
+ end
+ }
+
+ return tgt_len
+ end
+
def to_s
source_string = ""
@source.each { |i|
@@ -625,7 +655,7 @@ end
def PhrasePhraseExtraction.remove_too_long_src_sides rules
return rules.reject { |r|
- r.len_src > PhrasePhraseExtraction::MAX_SRC_SZ
+ r.len_src_w > PhrasePhraseExtraction::MAX_SRC_SZ
}
end
diff --git a/server.rb b/server.rb
index 5a95131..599fdbd 100755
--- a/server.rb
+++ b/server.rb
@@ -34,7 +34,7 @@ $oov_corrected.default = false
# #############################################################################
# Daemons
# #############################################################################
-DIR="/fast_scratch/simianer/lfpe"
+DIR="/srv/postedit"
$daemons = {
:tokenizer => "#{DIR}/lfpe/util/nanomsg_wrapper.rb -a tokenize -S '__ADDR__' -e #{EXTERNAL} -l #{TARGET_LANG}",
:tokenizer_src => "#{DIR}/lfpe/util/nanomsg_wrapper.rb -a tokenize -S '__ADDR__' -e #{EXTERNAL} -l #{SOURCE_LANG}",
@@ -123,6 +123,8 @@ def init
$env[name] = { :socket => sock, :pid => pid }
port += 1
}
+
+ send_recv :truecaser, "lOaD iT"
# lock file
`touch #{LOCK_FILE}`
$status = "Initialized" # status
@@ -293,6 +295,8 @@ def process_next reply
$db['svg'] << data['svg']
$db['original_svg'] << data['original_svg']
$db['durations'] << data['duration'].to_f
+ $db['count_click'] << data['count_click'].to_i
+ $db['count_kbd'] << data['count_kbd'].to_i
$db['post_edits_display'] << send_recv(:detokenizer, post_edit)
$last_processed_postedit = $db['post_edits_display'].last
# 1. tokenize
@@ -406,7 +410,7 @@ def process_next reply
end
# - known rules
logmsg :server, "annotating known rules"
- $status = "Adding rules to grammar" # status
+ $status = "Adding rules to the grammar" # status
match = {}
$known_rules.each { |r|
_,src,tgt,_,_ = splitpipe r
@@ -421,13 +425,18 @@ def process_next reply
all_rules[j] = ar
end
}
- WriteFile.new(grammar).write all_rules.join("\n")+"\n"
# - additional rules
- $new_rules.each { |rule|
- logmsg :server, "adding rule '#{rule}' to grammar '#{grammar}'"
- s = splitpipe(rule)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- `echo "#{rule}" >> #{grammar}`
- }
+ #logmsg :server, $new_rules.to_s
+ if $new_rules.size > 0
+ all_rules += $new_rules
+ #`echo "#{s}" >> #{grammar}`
+ end
+ WriteFile.new(grammar).write all_rules.join("\n")+"\n"
+ #$new_rules.each { |rule|
+ # logmsg :server, "adding rule '#{rule}' to grammar '#{grammar}'"
+ # s = splitpipe(rule)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ # `echo "#{rule}" >> #{grammar}`
+ #}
# 2. check for OOVs
if !$oov_corrected[$db['progress']]
$status = "Checking for OOVs" # status
@@ -664,11 +673,21 @@ get '/reset_extractor' do # reset grammar extractor
return "reset extractor: done"
end
+get '/reset_grammars' do # reset grammar extractor
+ logmsg :server, "reset grammars"
+ return "locked" if $lock
+ `cp #{SESSION_DIR}/g/original/* #{SESSION_DIR}/g/`
+ $last_reply = nil
+
+ return "reset grammars: done"
+end
+
get '/reset_new_rules' do # removed learned rules
$new_rules.clear
$known_rules.clear
`rm #{WORK_DIR}/*.*_rules`
`rm #{WORK_DIR}/g/*`
+ $last_reply = nil
return "reset new rules: done"
end
diff --git a/static/pattr-abstracts.html b/static/pattr-abstracts.html
index c24598c..09f5779 100644
--- a/static/pattr-abstracts.html
+++ b/static/pattr-abstracts.html
@@ -20,7 +20,7 @@ div.ex:hover {
<h2>Abstracts</h2>
-<h3><a href="http://coltrane.cl.uni-heidelberg.de:60666/load/0">EP-0005734-A1</a> (H01H)</h3>
+<h3><a href="http://lemmy.cl.uni-heidelberg.de:60666/load/0">EP-0005734-A1</a> (H01H)</h3>
<div class="ex">
<strong>Ein elektromagnetisch betätigtes Schaltgerät, z. B. ein elektrisches Schütz, hat einen magnetischen Eisenkern, der aus zwei gleichen E-förmigen Magnetteilen besteht, nämlich dem die Wicklung tragenden Magnetkern (8) und dem Anker (7).</strong><br/><hr/>
<strong>An electromagnetically operated switchgear, e.g. an electrical contactor, has a magnetic iron core which consists of two identical E- shaped magnet parts, specifically the magnet core (8), carrying the winding, and the armature (7).</strong>
@@ -38,7 +38,7 @@ div.ex:hover {
<hr />
-<h3><a href="http://coltrane.cl.uni-heidelberg.de:60666/load/1">EP-0003301-A1</a> (A01N,C07C)</h3>
+<h3><a href="http://lemmy.cl.uni-heidelberg.de:60666/load/1">EP-0003301-A1</a> (A01N,C07C)</h3>
<div class="ex">
<strong>Sie weisen starke insektizide, akarizide und nematizide Eigenschaften auf.</strong><br/><hr/>
<strong>They have strong insecticidal, acaricidal and nematicidal properties.</strong>
@@ -50,7 +50,7 @@ div.ex:hover {
<hr/>
-<h3><a href="http://coltrane.cl.uni-heidelberg.de:60666/load/2">EP-0003578-A2</a> (F25B)</h3>
+<h3><a href="http://lemmy.cl.uni-heidelberg.de:60666/load/2">EP-0003578-A2</a> (F25B)</h3>
<div class="ex">
<strong>Die Erfindung bezieht sich auf den Kältemittelkreislauf (1) einer Wärmepumpe.</strong><br/><hr/>
<strong>The invention relates to the refrigerant circuit (1) of a heat pump.</strong>
@@ -78,7 +78,7 @@ div.ex:hover {
<hr/>
-<h3><a href="http://coltrane.cl.uni-heidelberg.de:60666/load/3">EP-0002017-A1</a> (C25B)</h3>
+<h3><a href="http://lemmy.cl.uni-heidelberg.de:60666/load/3">EP-0002017-A1</a> (C25B)</h3>
<div class="ex">
<strong>Anoden für Elektrolysezwecke bestehen aus einem elektrisch leitenden Trägerkörper und einer darauf aufgebrachten Schicht aus metallischem Silicium und/oder Germanium.</strong><br/><hr/>
<strong>Anodes for electrochemical purposes are composed of an electrically conducting support body and a layer of metallic silicon and/or germanium applied thereto.</strong>
@@ -94,7 +94,7 @@ div.ex:hover {
<hr/>
- <h3><a href="http://coltrane.cl.uni-heidelberg.de:60666/load/4">EP-0018427-A1</a> (G05B)</h3>
+ <h3><a href="http://lemmy.cl.uni-heidelberg.de:60666/load/4">EP-0018427-A1</a> (G05B)</h3>
<div class="ex">
<strong>Elektrische Steuerschaltung mit einem Signalgenerator­ teil (20), der in Abhängigkeit von einem Führungssignal auf­ einanderfolgende gleichmäßige Impulse erzeugt.</strong><br/><hr/>
<strong>Electrical control circuit comprising a signal generator section (20) which generates successive uniform pulses in dependence on a control signal.</strong>
diff --git a/util/run_beta_test b/util/run_beta_test
index a0fe20f..9180b52 100755
--- a/util/run_beta_test
+++ b/util/run_beta_test
@@ -1,8 +1,8 @@
#!/bin/zsh -x
-cd /fast_scratch/simianer/lfpe/lfpe/util
+cd /srv/postedit/lfpe/util
./kill; ./kill; ./kill;
-for i in ../../sessions/product_de-en_beta_test_*; do
+for i in `ls -1 ../../sessions/ | grep -v "_1_" | grep -v data | grep -v toy`; do
echo $i
echo $(basename $i)
./run_server $(basename $i) &; sleep 600;
diff --git a/util/run_server b/util/run_server
index 7d45583..a4b7a6c 100755
--- a/util/run_server
+++ b/util/run_server
@@ -1,15 +1,17 @@
#!/bin/bash -x
-export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/lib/nanomsg-0.5-beta/lib:$LD_LIBRARY_PATH
-export PYTHONPATH=/fast_scratch/simianer/lfpe/lib/python:$PYTHONPATH
-export GEM_PATH=/fast_scratch/simianer/lfpe/lib/ruby/:$GEM_PATH
-UTIL=/fast_scratch/simianer/lfpe/lfpe/util
+BASE_DIR=/srv/postedit
+export LD_LIBRARY_PATH=$BASE_DIR/lib/nanomsg-0.5-beta/lib:$LD_LIBRARY_PATH
+export PYTHONPATH=$BASE_DIR/lib/python:$PYTHONPATH
+export GEM_PATH=$BASE_DIR/lib/ruby/:$GEM_PATH
+UTIL=$BASE_DIR/lfpe/util
SESSION=$1
-DIR=/fast_scratch/simianer/lfpe/sessions/$SESSION
+SESSION_DIR=$BASE_DIR/sessions/$SESSION
-rm $DIR/work/lockfile
-rm -r $DIR/work/
-mkdir -p $DIR/work
-cp $DIR/data.json.original $DIR/data.json
-$UTIL/../server.rb $DIR/conf.rb &>$DIR/work/session.out
+rm $SESSION_DIR/work/lockfile
+rm -r $SESSION_DIR/work/
+mkdir -p $SESSION_DIR/work
+cp $SESSION_DIR/data.json.original $SESSION_DIR/data.json
+cp $SESSION_DIR/g/original/* $SESSION_DIR/g/
+$UTIL/../server.rb $SESSION_DIR/conf.rb &>$SESSION_DIR/work/session.out
diff --git a/views/debug.haml b/views/debug.haml
index 4ebb0a0..3c1e006 100644
--- a/views/debug.haml
+++ b/views/debug.haml
@@ -43,10 +43,13 @@
%a.ajax{:tgt => "/reset_weights", :href => "#controls"} Reset weights
%li
%a.ajax{:tgt => "/reset_learning_rates", :href => "#controls"} Reset learning rates
+ /
+ %li
+ %a.ajax{:tgt => "/reset_extractor", :href => "#controls"} Reset extractor
%li
- %a.ajax{:tgt => "/reset_extractor", :href => "#controls"} Reset extractor
+ %a.ajax{:tgt => "/reset_grammars", :href => "#controls"} Reset grammars
%li
- %a.ajax{:tgt => "/reset_new_rules", :href => "#controls"} Reset new rules
+ %a.ajax{:tgt => "/reset_new_rules", :href => "#controls"} Reset new rules
/
%li
%a.ajax{:tgt => "/shutdown", :href => "#controls"} Initiate shutdown
@@ -141,6 +144,8 @@
%p.updated <strong>Number of updates:</strong> #{pairwise_ranking_data["num_up"]}
%p.updated <strong>Updated features:</strong> #{pairwise_ranking_data["updated_features"]}
%p <strong>Duration:</strong> #{data["durations"][progress]}ms
+ %p <strong>Keypresses:</strong> #{data["count_kbd"][progress]}
+ %p <strong>Clicks:</strong> #{data["count_click"][progress]}
%h3 Derivation
%p
@@ -299,9 +304,9 @@
%tr
%td Shape_*
%td.left Indicator features for rule shapes (39 in total)
- %tr
- %td IsSupportedOnline
- %td.left Rules with support from local context (added by Denkowski's online suffix array extractor)
+ /=%tr
+ /= %td IsSupportedOnline
+ /= %td.left Rules with support from local context (added by Denkowski's online suffix array extractor)
%p.up
%a{ :href => "#" } ^ up