summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2015-12-17 15:41:45 +0100
committerPatrick Simianer <p@simianer.de>2015-12-17 15:41:45 +0100
commitc03f111ae6eac7b5b6ac2ad4635603eededcb6a3 (patch)
tree465a6d68ea31e116e05fc11a2e52d2f85af8e57e
parent19541176d3f93e0092ca91b1413cafeceda6784e (diff)
fix oov bug, better/fixed known rule handling
-rw-r--r--index.php3
-rw-r--r--js/lfpe.js4
-rwxr-xr-xserver.rb25
-rw-r--r--views/debug.haml7
4 files changed, 26 insertions, 13 deletions
diff --git a/index.php b/index.php
index be71c70..7ef8562 100644
--- a/index.php
+++ b/index.php
@@ -34,8 +34,7 @@
<option value="product_de-en_beta_test_C_sparse" onclick="document.getElementById('key').value=this.value;">C [sparse]</option>
<option value="product_de-en_beta_test_D" onclick="document.getElementById('key').value=this.value;">D</option>
<option value="product_de-en_beta_test_D_sparse" onclick="document.getElementById('key').value=this.value;">D [sparse]</option>
- <option value="toy_example" onclick="document.getElementById('key').value=this.value;">toy example</option>
- <option value="product_de-en_toy_example" onclick="document.getElementById('key').value=this.value;">toy example [new]</option>
+ <option value="product_de-en_toy_example" onclick="document.getElementById('key').value=this.value;">toy example</option>
</select>
</p>
</div>
diff --git a/js/lfpe.js b/js/lfpe.js
index 9185018..d8f45d0 100644
--- a/js/lfpe.js
+++ b/js/lfpe.js
@@ -205,8 +205,8 @@ function Next()
var src = [];
var tgt = [];
for (var i=0; i<l; i++) {
- src.push(trim(document.getElementById("oov_src"+i).value));
- tgt.push(trim(document.getElementById("oov_tgt"+i).value));
+ src.push(encodeURIComponent(trim(document.getElementById("oov_src"+i).value)));
+ tgt.push(encodeURIComponent(trim(document.getElementById("oov_tgt"+i).value)));
if (tgt[tgt.length-1] == "") { // empty correction
alert("Please provide translations for all words.");
//not_working();
diff --git a/server.rb b/server.rb
index 8f20162..f833611 100755
--- a/server.rb
+++ b/server.rb
@@ -162,13 +162,13 @@ post '/next' do # (receive post-edit, update models), send next translation
grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar"
src, tgt = splitpipe(data["correct"]) # format:src1\tsrc2\tsrc..|||tgt1\t..
tgt = clean_str tgt
- src = src.split("\t").map { |i| i.strip }
- tgt = tgt.split("\t").map { |i| i.strip }
+ src = src.split("\t").map { |i| URI.decode(i).strip }
+ tgt = tgt.split("\t").map { |i| URI.decode(i).strip }
src.each_with_index { |s,i|
next if s==''||tgt[i]==''
as = ""
tgt[i].split.each_index { |k| as += " 0-#{k}" }
- r = "[X] ||| #{s} ||| #{tgt[i]} ||| NewRule=1 OOVFix=1 ||| #{as}"
+ r = "[X] ||| #{s} ||| #{tgt[i]} ||| NewRule=1 ||| #{as}"
$additional_rules << r
}
$confirmed = true
@@ -215,21 +215,31 @@ post '/next' do # (receive post-edit, update models), send next translation
s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
sts[s] = true
}
-
+ ats = {}
+ $additional_rules.each { |r|
+ s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ ats[s] = true
+ }
f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.new_rules"
new_rules = new_rules.map { |r| r.as_trule_string }
logmsg :server, "# rules before filtering #{new_rules.size}"
_ = new_rules.dup
new_rules.reject! { |rs|
s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- sts.has_key? s
+ sts.has_key?(s)
}
logmsg :server, "# rules after filtering #{new_rules.size}"
(_-new_rules).each { |r|
logmsg :server, "rejected rule [already known]: '#{r}'"
}
- $additional_rules += new_rules
$rejected_rules += _-new_rules
+ logmsg :server, "removing known new rules, before: #{new_rules.size}"
+ new_rules.reject! { |rs|
+ s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ ats.has_key?(s)
+ }
+ logmsg :server, "after: #{new_rules.size}"
+ $additional_rules += new_rules
f.write new_rules.join "\n"
f.close
else # text interface
@@ -278,11 +288,12 @@ post '/next' do # (receive post-edit, update models), send next translation
# 5d actual extractor
send_recv :extractor, "default_context ||| #{source} ||| #{post_edit} ||| #{a}"
# 6. update database
- logmsg :db, "updating database"
$db['updated'] << true
+ `cp #{WORK_DIR}/dtrain.debug.json #{WORK_DIR}/#{$db['progress']}.dtrain.debug.json`
else
$db['updated'] << false
end
+ logmsg :db, "updating database"
update_database
end
source = $db['source_segments'][$db['progress']]
diff --git a/views/debug.haml b/views/debug.haml
index 2a0773d..e7b6a7b 100644
--- a/views/debug.haml
+++ b/views/debug.haml
@@ -42,7 +42,7 @@
%li
%a.ajax{:tgt => "/shutdown", :href => "#controls"} Initiate shutdown
%h3 Learning rate
- %p Default for dense features: <strong>0.001</strong>, for sparse features: <strong>1.0e-05</strong>
+ %p Default for dense features: <strong>1.0e-05</strong>, for sparse features: <strong>1.0e-05</strong>
%select
- [1000,100,10,1,0.1,0.01,0.001,0.0001,0.00001,0.000001,0.0000001,0.00000001,0.000000001,0.0000000001].each do |i|
%option.ajax{:value => i, :tgt => "/set_learning_rate/#{i}"} #{i}
@@ -120,7 +120,7 @@
%h2#rules New &amp; known rules
%pre #{additional_rules.join("\n")}
%h3
- Rejected [known] rules
+ Known rules
%pre #{rejected_rules.join("\n")}
%p
%a{ :href => "#" } ^ up
@@ -148,6 +148,9 @@
- if [9,89].include? j
%hr
%h3 Weight updates
+ %p Raw update: #{pairwise_ranking_data["update_raw"]}
+ /=%p Update (learning rates applied): <pre>#{pairwise_ranking_data["update"]}</pre>
+ /=- "#{pairwise_ranking_data["update"]}"
%table
%tr
%th Feature