From 7829b5c7a6f82121b013d819aad3e8239cd2596f Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Tue, 3 May 2016 14:22:32 +0200
Subject: fixes
---
server.rb | 31 ++++++++++++++++++++++++-------
1 file changed, 24 insertions(+), 7 deletions(-)
(limited to 'server.rb')
diff --git a/server.rb b/server.rb
index 5a95131..9ca43b1 100755
--- a/server.rb
+++ b/server.rb
@@ -34,7 +34,7 @@ $oov_corrected.default = false
# #############################################################################
# Daemons
# #############################################################################
-DIR="/fast_scratch/simianer/lfpe"
+DIR="/srv/postedit"
$daemons = {
:tokenizer => "#{DIR}/lfpe/util/nanomsg_wrapper.rb -a tokenize -S '__ADDR__' -e #{EXTERNAL} -l #{TARGET_LANG}",
:tokenizer_src => "#{DIR}/lfpe/util/nanomsg_wrapper.rb -a tokenize -S '__ADDR__' -e #{EXTERNAL} -l #{SOURCE_LANG}",
@@ -123,6 +123,8 @@ def init
$env[name] = { :socket => sock, :pid => pid }
port += 1
}
+
+ send_recv :truecaser, "lOaD iT"
# lock file
`touch #{LOCK_FILE}`
$status = "Initialized" # status
@@ -406,7 +408,7 @@ def process_next reply
end
# - known rules
logmsg :server, "annotating known rules"
- $status = "Adding rules to grammar" # status
+ $status = "Adding rules to the grammar" # status
match = {}
$known_rules.each { |r|
_,src,tgt,_,_ = splitpipe r
@@ -423,11 +425,16 @@ def process_next reply
}
WriteFile.new(grammar).write all_rules.join("\n")+"\n"
# - additional rules
- $new_rules.each { |rule|
- logmsg :server, "adding rule '#{rule}' to grammar '#{grammar}'"
- s = splitpipe(rule)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- `echo "#{rule}" >> #{grammar}`
- }
+ logmsg :server, $new_rules.to_s
+ if $new_rules.size > 0
+ s = $new_rules.join "\n"
+ `echo "#{s}" >> #{grammar}`
+ end
+ #$new_rules.each { |rule|
+ # logmsg :server, "adding rule '#{rule}' to grammar '#{grammar}'"
+ # s = splitpipe(rule)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ # `echo "#{rule}" >> #{grammar}`
+ #}
# 2. check for OOVs
if !$oov_corrected[$db['progress']]
$status = "Checking for OOVs" # status
@@ -664,11 +671,21 @@ get '/reset_extractor' do # reset grammar extractor
return "reset extractor: done"
end
+get '/reset_grammars' do # reset grammar extractor
+ logmsg :server, "reset grammars"
+ return "locked" if $lock
+ `cp #{SESSION_DIR}/g/original/* #{SESSION_DIR}/g/`
+ $last_reply = nil
+
+ return "reset grammars: done"
+end
+
get '/reset_new_rules' do # removed learned rules
$new_rules.clear
$known_rules.clear
`rm #{WORK_DIR}/*.*_rules`
`rm #{WORK_DIR}/g/*`
+ $last_reply = nil
return "reset new rules: done"
end
--
cgit v1.2.3
From 6bd7135e6039b0682f49234e42451077413f0bd9 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Tue, 10 May 2016 10:49:09 +0200
Subject: count clicks and keystrokes, fix bug in rule addition, good params
and improvement for phrase2 extraction
---
interface.php | 5 ++--
js/interface.js | 47 +++++++++++++++++++++++++++++++-
phrase2_extraction/phrase2_extraction.rb | 38 +++++++++++++++++++++++---
server.rb | 10 ++++---
views/debug.haml | 2 ++
5 files changed, 91 insertions(+), 11 deletions(-)
(limited to 'server.rb')
diff --git a/interface.php b/interface.php
index 8df7fd0..46b07f0 100644
--- a/interface.php
+++ b/interface.php
@@ -33,7 +33,7 @@
Target: |
-
+
|
@@ -52,7 +52,7 @@ Note that the source word may be distorted.
-
+
Working: ...
@@ -103,5 +103,6 @@ Note that the source word may be distorted.
+
diff --git a/js/interface.js b/js/interface.js
index d346b96..f009641 100644
--- a/js/interface.js
+++ b/js/interface.js
@@ -5,6 +5,9 @@
var data, // global data object
ui_type; // 't' (text) or 'g' (graphical)
+var TEXT_count_click=0,
+ TEXT_count_kbd=0;
+
/*
* cross-site request
*
@@ -108,6 +111,18 @@ var catch_return = function (e)
return false;
}
+var TEXT_handle_keypress = function (e)
+{
+ if (e.keyCode == 13) {
+ e.preventDefault();
+ next();
+ }
+
+ TEXT_count_kbd += 1;
+
+ return false;
+}
+
/*
* working/not working
*
@@ -259,6 +274,8 @@ var next = function ()
post_edit = $.trim(target_textarea.value);
send_data["post_edit"] = encodeURIComponent(post_edit);
send_data['type'] = 't';
+ send_data["count_click"] = TEXT_count_click;
+ send_data["count_kbd"] = TEXT_count_kbd;
}
send_data["key"] = key;
@@ -456,8 +473,11 @@ var request_and_process_next = function ()
target_textarea.rows = Math.round(translation.length/80+0.5);
//raw_source_textarea.rows = Math.round(raw_source.length/80+0.5);
target_textarea.focus();
+ $("#original_mt").val(target_textarea.value);
target_textarea.selectionStart = 0;
target_textarea.selectionEnd = 0;
+ TEXT_count_click = 0;
+ TEXT_count_kbd = 0;
// remember aux data in DOM
current_seg_id.value = id;
@@ -493,10 +513,35 @@ var init_text_editor = function ()
{
document.getElementById("target_textarea").value = "";
document.getElementById("target_textarea").setAttribute("disabled", "disabled");
+
+ TEXT_count_click = 0;
+ TEXT_count_kbd = 0;
+
+ $("#target_textarea").click(function () {
+ TEXT_count_click += 1;
+ });
return false;
}
+var get_ui_type = function ()
+{
+ return document.getElementById("ui_type").value;
+}
+
+var reset = function ()
+{
+ var ui_type = get_ui_type();
+ if (ui_type == "t") {
+ if (!$("#init").val()) return;
+ TEXT_count_click = 0;
+ TEXT_count_kbd = 0;
+ $("#target_textarea").val($("#original_mt").val());
+ } else if (ui_type == "g") {
+ DE_init()
+ }
+}
+
/*
* init site
*
@@ -514,7 +559,7 @@ $().ready(function()
not_working();
- ui_type = document.getElementById("ui_type").value;
+ ui_type = get_ui_type();
// graphical derivation editor
if (ui_type == "g") {
diff --git a/phrase2_extraction/phrase2_extraction.rb b/phrase2_extraction/phrase2_extraction.rb
index 253df1b..547e0be 100755
--- a/phrase2_extraction/phrase2_extraction.rb
+++ b/phrase2_extraction/phrase2_extraction.rb
@@ -5,9 +5,9 @@ require 'zipf'
module PhrasePhraseExtraction
DEBUG = false
-MAX_NT = 1 # Chiang: 2
-MAX_SEED_NUM_WORDS = 10 # Chiang: 10 words
-MAX_SRC_SZ = 5 # Chiang: 5 words
+MAX_NT = 2 # Chiang: 2
+MAX_SEED_NUM_WORDS = 4 # Chiang: 10 words, -> phrases!
+MAX_SRC_SZ = 10 # Chiang: 5 words, -> words!
FORBID_SRC_ADJACENT_SRC_NT = true # Chiang:true
class Rule
@@ -51,6 +51,21 @@ class Rule
return src_len
end
+ def len_src_w
+ src_len = 0
+ @source.each { |i|
+ if i.is_a? String
+ src_len += i.split.size #1
+ else
+ i.each { |j|
+ src_len += source_context[j].split.size
+ }
+ end
+ }
+
+ return src_len
+ end
+
def len_tgt
tgt_len = 0
@target.each { |i|
@@ -64,6 +79,21 @@ class Rule
return tgt_len
end
+ def len_tgt_w
+ tgt_len = 0
+ @target.each { |i|
+ if i.is_a? String
+ tgt_len += i.split.size
+ else
+ i.each { |j|
+ tgt_len += target_context[j].split.size
+ }
+ end
+ }
+
+ return tgt_len
+ end
+
def to_s
source_string = ""
@source.each { |i|
@@ -625,7 +655,7 @@ end
def PhrasePhraseExtraction.remove_too_long_src_sides rules
return rules.reject { |r|
- r.len_src > PhrasePhraseExtraction::MAX_SRC_SZ
+ r.len_src_w > PhrasePhraseExtraction::MAX_SRC_SZ
}
end
diff --git a/server.rb b/server.rb
index 9ca43b1..599fdbd 100755
--- a/server.rb
+++ b/server.rb
@@ -295,6 +295,8 @@ def process_next reply
$db['svg'] << data['svg']
$db['original_svg'] << data['original_svg']
$db['durations'] << data['duration'].to_f
+ $db['count_click'] << data['count_click'].to_i
+ $db['count_kbd'] << data['count_kbd'].to_i
$db['post_edits_display'] << send_recv(:detokenizer, post_edit)
$last_processed_postedit = $db['post_edits_display'].last
# 1. tokenize
@@ -423,13 +425,13 @@ def process_next reply
all_rules[j] = ar
end
}
- WriteFile.new(grammar).write all_rules.join("\n")+"\n"
# - additional rules
- logmsg :server, $new_rules.to_s
+ #logmsg :server, $new_rules.to_s
if $new_rules.size > 0
- s = $new_rules.join "\n"
- `echo "#{s}" >> #{grammar}`
+ all_rules += $new_rules
+ #`echo "#{s}" >> #{grammar}`
end
+ WriteFile.new(grammar).write all_rules.join("\n")+"\n"
#$new_rules.each { |rule|
# logmsg :server, "adding rule '#{rule}' to grammar '#{grammar}'"
# s = splitpipe(rule)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
diff --git a/views/debug.haml b/views/debug.haml
index 1bc0fd1..3c1e006 100644
--- a/views/debug.haml
+++ b/views/debug.haml
@@ -144,6 +144,8 @@
%p.updated Number of updates: #{pairwise_ranking_data["num_up"]}
%p.updated Updated features: #{pairwise_ranking_data["updated_features"]}
%p Duration: #{data["durations"][progress]}ms
+ %p Keypresses: #{data["count_kbd"][progress]}
+ %p Clicks: #{data["count_click"][progress]}
%h3 Derivation
%p
--
cgit v1.2.3