diff options
author | Patrick Simianer <p@simianer.de> | 2016-05-10 10:59:49 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2016-05-10 10:59:49 +0200 |
commit | 8dd7a811559deafbe9262f11be2d592617b030a1 (patch) | |
tree | e30c76fbbea82f89dd10e56c35e70ec357bb24b4 | |
parent | 189698c044eb3362e65d213c35f425694eba9f27 (diff) | |
parent | 6bd7135e6039b0682f49234e42451077413f0bd9 (diff) |
Merge branch 'master' of github.com:pks/lfpe
-rw-r--r-- | .htaccess | 2 | ||||
-rw-r--r-- | README.md | 25 | ||||
-rw-r--r-- | external/lfpe-apache | 8 | ||||
-rw-r--r-- | inc/db.inc.php | 2 | ||||
-rw-r--r-- | interface.php | 5 | ||||
-rw-r--r-- | js/interface.js | 52 | ||||
-rwxr-xr-x | phrase2_extraction/phrase2_extraction.rb | 36 | ||||
-rwxr-xr-x | server.rb | 35 | ||||
-rw-r--r-- | static/pattr-abstracts.html | 10 | ||||
-rwxr-xr-x | util/run_beta_test | 4 | ||||
-rwxr-xr-x | util/run_server | 22 | ||||
-rw-r--r-- | views/debug.haml | 15 |
12 files changed, 167 insertions, 49 deletions
@@ -1,5 +1,5 @@ AuthName "Post-Editing Interface" AuthType Basic -AuthUserFile /fast_scratch/simianer/lfpe/.htpasswd +AuthUserFile /srv/postedit/.htpasswd require valid-user @@ -2,20 +2,35 @@ Post-editing interface for learning from post-edited machine translations. # Setup + +` + export BASE_DIR=/srv/postedit +` + ## nanomsg lib - export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/lib/nanomsg-0.5-beta/lib +` + export LD_LIBRARY_PATH=$BASE_DIR/lib/nanomsg-0.5-beta/lib +` ## ruby - [see $(pwd)/lib/ruby/gems/nanomsg-0.4.0/ext/extconf.rb] - gem install nanomsg -i $(pwd)/lib/ruby - export GEM_PATH=/fast_scratch/simianer/lfpe/lib/ruby/:$GEM_PATH +` + [see $BASE_DIR/lib/ruby/gems/nanomsg-0.4.0/ext/extconf.rb] + gem install nanomsg -i $BSAE_DIR/lib/ruby + export GEM_PATH=$BASE_DIR/lib/ruby/:$GEM_PATH +` ## iptables +` iptables -A INPUT -i eth0 -p tcp -m multiport --dports 50000:50100 -j ACCEPT +` ## apache +` ln -s /etc/apache2/sites-available/lfpe /etc/apache2/sites-enabled/020-lfpe +` ## python - export PYTHONPATH=/fast_scratch/simianer/lfpe/lib/python:$PYTHONPATH +` + export PYTHONPATH=$BASE_DIR/lib/python:$PYTHONPATH +` diff --git a/external/lfpe-apache b/external/lfpe-apache index e4de4be..eec5e5c 100644 --- a/external/lfpe-apache +++ b/external/lfpe-apache @@ -3,13 +3,13 @@ ServerAdmin simianer@cl.uni-heidelberg.de - DocumentRoot /fast_scratch/simianer/lfpe/lfpe + DocumentRoot /srv/postedit/lfpe - ErrorLog /fast_scratch/simianer/lfpe/lfpe/logs/apache2.error.log + ErrorLog /srv/postedit/lfpe/logs/apache2.error.log LogLevel warn - CustomLog /fast_scratch/simianer/lfpe/lfpe/logs/apache2.access.log combined + CustomLog /srv/postedit/lfpe/logs/apache2.access.log combined - <Directory /fast_scratch/simianer/lfpe/lfpe> + <Directory /srv/postedit/lfpe> Options +FollowSymLinks AllowOverride All order allow,deny diff --git a/inc/db.inc.php b/inc/db.inc.php index f08eda5..ed45f7b 100644 --- a/inc/db.inc.php +++ b/inc/db.inc.php @@ -1,6 +1,6 @@ <?php -$SESSION_DIR="/fast_scratch/simianer/lfpe/sessions"; +$SESSION_DIR="/srv/postedit/sessions"; $json = file_get_contents($SESSION_DIR."/".$_GET["key"]."/data.json"); $db = json_decode($json); diff --git a/interface.php b/interface.php index 8df7fd0..46b07f0 100644 --- a/interface.php +++ b/interface.php @@ -33,7 +33,7 @@ <tr> <td align="right">Target:</td> <td> - <textarea id="target_textarea" name="target" cols="80" rows="1" onkeypress="catch_return(event);" disabled></textarea> + <textarea id="target_textarea" name="target" cols="80" rows="1" onkeypress="TEXT_handle_keypress(event);" disabled></textarea> </td> </tr> </table> @@ -52,7 +52,7 @@ Note that the source word may be distorted. <div> <button id="help_button" class="button" onclick="$('#help').toggle('blind')">Help</button> <button id="pause_button" class='button' type="button" onclick="pause()">Pause</button> - <button id="reset_button" class='button' type="button" onclick="DE_init()">Reset</button> + <button id="reset_button" class='button' type="button" onclick="reset()">Reset</button> <button id="next" type="button" class='button' onclick="next();">Start/Continue</button> <span id="status"><strong>Working: <span id="status_detail">...</span></strong> <img src="static/ajax-loader-large.gif" width="20px" /></span> </div> @@ -103,5 +103,6 @@ Note that the source word may be distorted. <textarea style="display:none" id="ui_type" ><?php echo $_GET["ui_type"]; ?></textarea> <textarea style="display:none" id="data" ></textarea> <textarea style="display:none" id="original_svg" ></textarea> +<textarea style="display:none" id="original_mt" ></textarea> <!-- /Data --> diff --git a/js/interface.js b/js/interface.js index c946e0c..f009641 100644 --- a/js/interface.js +++ b/js/interface.js @@ -5,6 +5,9 @@ var data, // global data object ui_type; // 't' (text) or 'g' (graphical) +var TEXT_count_click=0, + TEXT_count_kbd=0; + /* * cross-site request * @@ -108,6 +111,18 @@ var catch_return = function (e) return false; } +var TEXT_handle_keypress = function (e) +{ + if (e.keyCode == 13) { + e.preventDefault(); + next(); + } + + TEXT_count_kbd += 1; + + return false; +} + /* * working/not working * @@ -230,7 +245,7 @@ var next = function () working(); // get metadata stored in DOM - var base_url = "http://coltrane.cl.uni-heidelberg.de"; + var base_url = "http://lemmy.cl.uni-heidelberg.de"; var port = document.getElementById("port").value; var key = document.getElementById("key").value; @@ -259,6 +274,8 @@ var next = function () post_edit = $.trim(target_textarea.value); send_data["post_edit"] = encodeURIComponent(post_edit); send_data['type'] = 't'; + send_data["count_click"] = TEXT_count_click; + send_data["count_kbd"] = TEXT_count_kbd; } send_data["key"] = key; @@ -334,7 +351,7 @@ var request_and_process_next = function () var last_post_edit = document.getElementById("last_post_edit"); // get metadata stored in DOM - var base_url = "http://coltrane.cl.uni-heidelberg.de"; + var base_url = "http://lemmy.cl.uni-heidelberg.de"; var port = document.getElementById("port").value; var key = document.getElementById("key").value; @@ -456,8 +473,11 @@ var request_and_process_next = function () target_textarea.rows = Math.round(translation.length/80+0.5); //raw_source_textarea.rows = Math.round(raw_source.length/80+0.5); target_textarea.focus(); + $("#original_mt").val(target_textarea.value); target_textarea.selectionStart = 0; target_textarea.selectionEnd = 0; + TEXT_count_click = 0; + TEXT_count_kbd = 0; // remember aux data in DOM current_seg_id.value = id; @@ -493,10 +513,35 @@ var init_text_editor = function () { document.getElementById("target_textarea").value = ""; document.getElementById("target_textarea").setAttribute("disabled", "disabled"); + + TEXT_count_click = 0; + TEXT_count_kbd = 0; + + $("#target_textarea").click(function () { + TEXT_count_click += 1; + }); return false; } +var get_ui_type = function () +{ + return document.getElementById("ui_type").value; +} + +var reset = function () +{ + var ui_type = get_ui_type(); + if (ui_type == "t") { + if (!$("#init").val()) return; + TEXT_count_click = 0; + TEXT_count_kbd = 0; + $("#target_textarea").val($("#original_mt").val()); + } else if (ui_type == "g") { + DE_init() + } +} + /* * init site * @@ -514,7 +559,7 @@ $().ready(function() not_working(); - ui_type = document.getElementById("ui_type").value; + ui_type = get_ui_type(); // graphical derivation editor if (ui_type == "g") { @@ -525,6 +570,7 @@ $().ready(function() init_text_editor(); document.getElementById("textboxes").style.display = "block"; } + }); diff --git a/phrase2_extraction/phrase2_extraction.rb b/phrase2_extraction/phrase2_extraction.rb index 48dfd73..547e0be 100755 --- a/phrase2_extraction/phrase2_extraction.rb +++ b/phrase2_extraction/phrase2_extraction.rb @@ -6,8 +6,8 @@ module PhrasePhraseExtraction DEBUG = false MAX_NT = 2 # Chiang: 2 -MAX_SEED_NUM_WORDS = 3 # Chiang: 10 words -MAX_SRC_SZ = 3 # Chiang: 5 words +MAX_SEED_NUM_WORDS = 4 # Chiang: 10 words, -> phrases! +MAX_SRC_SZ = 10 # Chiang: 5 words, -> words! FORBID_SRC_ADJACENT_SRC_NT = true # Chiang:true class Rule @@ -51,6 +51,21 @@ class Rule return src_len end + def len_src_w + src_len = 0 + @source.each { |i| + if i.is_a? String + src_len += i.split.size #1 + else + i.each { |j| + src_len += source_context[j].split.size + } + end + } + + return src_len + end + def len_tgt tgt_len = 0 @target.each { |i| @@ -64,6 +79,21 @@ class Rule return tgt_len end + def len_tgt_w + tgt_len = 0 + @target.each { |i| + if i.is_a? String + tgt_len += i.split.size + else + i.each { |j| + tgt_len += target_context[j].split.size + } + end + } + + return tgt_len + end + def to_s source_string = "" @source.each { |i| @@ -625,7 +655,7 @@ end def PhrasePhraseExtraction.remove_too_long_src_sides rules return rules.reject { |r| - r.len_src > PhrasePhraseExtraction::MAX_SRC_SZ + r.len_src_w > PhrasePhraseExtraction::MAX_SRC_SZ } end @@ -34,7 +34,7 @@ $oov_corrected.default = false # ############################################################################# # Daemons # ############################################################################# -DIR="/fast_scratch/simianer/lfpe" +DIR="/srv/postedit" $daemons = { :tokenizer => "#{DIR}/lfpe/util/nanomsg_wrapper.rb -a tokenize -S '__ADDR__' -e #{EXTERNAL} -l #{TARGET_LANG}", :tokenizer_src => "#{DIR}/lfpe/util/nanomsg_wrapper.rb -a tokenize -S '__ADDR__' -e #{EXTERNAL} -l #{SOURCE_LANG}", @@ -123,6 +123,8 @@ def init $env[name] = { :socket => sock, :pid => pid } port += 1 } + + send_recv :truecaser, "lOaD iT" # lock file `touch #{LOCK_FILE}` $status = "Initialized" # status @@ -293,6 +295,8 @@ def process_next reply $db['svg'] << data['svg'] $db['original_svg'] << data['original_svg'] $db['durations'] << data['duration'].to_f + $db['count_click'] << data['count_click'].to_i + $db['count_kbd'] << data['count_kbd'].to_i $db['post_edits_display'] << send_recv(:detokenizer, post_edit) $last_processed_postedit = $db['post_edits_display'].last # 1. tokenize @@ -406,7 +410,7 @@ def process_next reply end # - known rules logmsg :server, "annotating known rules" - $status = "Adding rules to grammar" # status + $status = "Adding rules to the grammar" # status match = {} $known_rules.each { |r| _,src,tgt,_,_ = splitpipe r @@ -421,13 +425,18 @@ def process_next reply all_rules[j] = ar end } - WriteFile.new(grammar).write all_rules.join("\n")+"\n" # - additional rules - $new_rules.each { |rule| - logmsg :server, "adding rule '#{rule}' to grammar '#{grammar}'" - s = splitpipe(rule)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") - `echo "#{rule}" >> #{grammar}` - } + #logmsg :server, $new_rules.to_s + if $new_rules.size > 0 + all_rules += $new_rules + #`echo "#{s}" >> #{grammar}` + end + WriteFile.new(grammar).write all_rules.join("\n")+"\n" + #$new_rules.each { |rule| + # logmsg :server, "adding rule '#{rule}' to grammar '#{grammar}'" + # s = splitpipe(rule)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") + # `echo "#{rule}" >> #{grammar}` + #} # 2. check for OOVs if !$oov_corrected[$db['progress']] $status = "Checking for OOVs" # status @@ -664,11 +673,21 @@ get '/reset_extractor' do # reset grammar extractor return "reset extractor: done" end +get '/reset_grammars' do # reset grammar extractor + logmsg :server, "reset grammars" + return "locked" if $lock + `cp #{SESSION_DIR}/g/original/* #{SESSION_DIR}/g/` + $last_reply = nil + + return "reset grammars: done" +end + get '/reset_new_rules' do # removed learned rules $new_rules.clear $known_rules.clear `rm #{WORK_DIR}/*.*_rules` `rm #{WORK_DIR}/g/*` + $last_reply = nil return "reset new rules: done" end diff --git a/static/pattr-abstracts.html b/static/pattr-abstracts.html index c24598c..09f5779 100644 --- a/static/pattr-abstracts.html +++ b/static/pattr-abstracts.html @@ -20,7 +20,7 @@ div.ex:hover { <h2>Abstracts</h2> -<h3><a href="http://coltrane.cl.uni-heidelberg.de:60666/load/0">EP-0005734-A1</a> (H01H)</h3> +<h3><a href="http://lemmy.cl.uni-heidelberg.de:60666/load/0">EP-0005734-A1</a> (H01H)</h3> <div class="ex"> <strong>Ein elektromagnetisch betätigtes Schaltgerät, z. B. ein elektrisches Schütz, hat einen magnetischen Eisenkern, der aus zwei gleichen E-förmigen Magnetteilen besteht, nämlich dem die Wicklung tragenden Magnetkern (8) und dem Anker (7).</strong><br/><hr/> <strong>An electromagnetically operated switchgear, e.g. an electrical contactor, has a magnetic iron core which consists of two identical E- shaped magnet parts, specifically the magnet core (8), carrying the winding, and the armature (7).</strong> @@ -38,7 +38,7 @@ div.ex:hover { <hr /> -<h3><a href="http://coltrane.cl.uni-heidelberg.de:60666/load/1">EP-0003301-A1</a> (A01N,C07C)</h3> +<h3><a href="http://lemmy.cl.uni-heidelberg.de:60666/load/1">EP-0003301-A1</a> (A01N,C07C)</h3> <div class="ex"> <strong>Sie weisen starke insektizide, akarizide und nematizide Eigenschaften auf.</strong><br/><hr/> <strong>They have strong insecticidal, acaricidal and nematicidal properties.</strong> @@ -50,7 +50,7 @@ div.ex:hover { <hr/> -<h3><a href="http://coltrane.cl.uni-heidelberg.de:60666/load/2">EP-0003578-A2</a> (F25B)</h3> +<h3><a href="http://lemmy.cl.uni-heidelberg.de:60666/load/2">EP-0003578-A2</a> (F25B)</h3> <div class="ex"> <strong>Die Erfindung bezieht sich auf den Kältemittelkreislauf (1) einer Wärmepumpe.</strong><br/><hr/> <strong>The invention relates to the refrigerant circuit (1) of a heat pump.</strong> @@ -78,7 +78,7 @@ div.ex:hover { <hr/> -<h3><a href="http://coltrane.cl.uni-heidelberg.de:60666/load/3">EP-0002017-A1</a> (C25B)</h3> +<h3><a href="http://lemmy.cl.uni-heidelberg.de:60666/load/3">EP-0002017-A1</a> (C25B)</h3> <div class="ex"> <strong>Anoden für Elektrolysezwecke bestehen aus einem elektrisch leitenden Trägerkörper und einer darauf aufgebrachten Schicht aus metallischem Silicium und/oder Germanium.</strong><br/><hr/> <strong>Anodes for electrochemical purposes are composed of an electrically conducting support body and a layer of metallic silicon and/or germanium applied thereto.</strong> @@ -94,7 +94,7 @@ div.ex:hover { <hr/> - <h3><a href="http://coltrane.cl.uni-heidelberg.de:60666/load/4">EP-0018427-A1</a> (G05B)</h3> + <h3><a href="http://lemmy.cl.uni-heidelberg.de:60666/load/4">EP-0018427-A1</a> (G05B)</h3> <div class="ex"> <strong>Elektrische Steuerschaltung mit einem Signalgenerator teil (20), der in Abhängigkeit von einem Führungssignal auf einanderfolgende gleichmäßige Impulse erzeugt.</strong><br/><hr/> <strong>Electrical control circuit comprising a signal generator section (20) which generates successive uniform pulses in dependence on a control signal.</strong> diff --git a/util/run_beta_test b/util/run_beta_test index a0fe20f..9180b52 100755 --- a/util/run_beta_test +++ b/util/run_beta_test @@ -1,8 +1,8 @@ #!/bin/zsh -x -cd /fast_scratch/simianer/lfpe/lfpe/util +cd /srv/postedit/lfpe/util ./kill; ./kill; ./kill; -for i in ../../sessions/product_de-en_beta_test_*; do +for i in `ls -1 ../../sessions/ | grep -v "_1_" | grep -v data | grep -v toy`; do echo $i echo $(basename $i) ./run_server $(basename $i) &; sleep 600; diff --git a/util/run_server b/util/run_server index 7d45583..a4b7a6c 100755 --- a/util/run_server +++ b/util/run_server @@ -1,15 +1,17 @@ #!/bin/bash -x -export LD_LIBRARY_PATH=/fast_scratch/simianer/lfpe/lib/nanomsg-0.5-beta/lib:$LD_LIBRARY_PATH -export PYTHONPATH=/fast_scratch/simianer/lfpe/lib/python:$PYTHONPATH -export GEM_PATH=/fast_scratch/simianer/lfpe/lib/ruby/:$GEM_PATH -UTIL=/fast_scratch/simianer/lfpe/lfpe/util +BASE_DIR=/srv/postedit +export LD_LIBRARY_PATH=$BASE_DIR/lib/nanomsg-0.5-beta/lib:$LD_LIBRARY_PATH +export PYTHONPATH=$BASE_DIR/lib/python:$PYTHONPATH +export GEM_PATH=$BASE_DIR/lib/ruby/:$GEM_PATH +UTIL=$BASE_DIR/lfpe/util SESSION=$1 -DIR=/fast_scratch/simianer/lfpe/sessions/$SESSION +SESSION_DIR=$BASE_DIR/sessions/$SESSION -rm $DIR/work/lockfile -rm -r $DIR/work/ -mkdir -p $DIR/work -cp $DIR/data.json.original $DIR/data.json -$UTIL/../server.rb $DIR/conf.rb &>$DIR/work/session.out +rm $SESSION_DIR/work/lockfile +rm -r $SESSION_DIR/work/ +mkdir -p $SESSION_DIR/work +cp $SESSION_DIR/data.json.original $SESSION_DIR/data.json +cp $SESSION_DIR/g/original/* $SESSION_DIR/g/ +$UTIL/../server.rb $SESSION_DIR/conf.rb &>$SESSION_DIR/work/session.out diff --git a/views/debug.haml b/views/debug.haml index 4ebb0a0..3c1e006 100644 --- a/views/debug.haml +++ b/views/debug.haml @@ -43,10 +43,13 @@ %a.ajax{:tgt => "/reset_weights", :href => "#controls"} Reset weights %li %a.ajax{:tgt => "/reset_learning_rates", :href => "#controls"} Reset learning rates + / + %li + %a.ajax{:tgt => "/reset_extractor", :href => "#controls"} Reset extractor %li - %a.ajax{:tgt => "/reset_extractor", :href => "#controls"} Reset extractor + %a.ajax{:tgt => "/reset_grammars", :href => "#controls"} Reset grammars %li - %a.ajax{:tgt => "/reset_new_rules", :href => "#controls"} Reset new rules + %a.ajax{:tgt => "/reset_new_rules", :href => "#controls"} Reset new rules / %li %a.ajax{:tgt => "/shutdown", :href => "#controls"} Initiate shutdown @@ -141,6 +144,8 @@ %p.updated <strong>Number of updates:</strong> #{pairwise_ranking_data["num_up"]} %p.updated <strong>Updated features:</strong> #{pairwise_ranking_data["updated_features"]} %p <strong>Duration:</strong> #{data["durations"][progress]}ms + %p <strong>Keypresses:</strong> #{data["count_kbd"][progress]} + %p <strong>Clicks:</strong> #{data["count_click"][progress]} %h3 Derivation %p @@ -299,9 +304,9 @@ %tr %td Shape_* %td.left Indicator features for rule shapes (39 in total) - %tr - %td IsSupportedOnline - %td.left Rules with support from local context (added by Denkowski's online suffix array extractor) + /=%tr + /= %td IsSupportedOnline + /= %td.left Rules with support from local context (added by Denkowski's online suffix array extractor) %p.up %a{ :href => "#" } ^ up |