diff options
author | Patrick Simianer <p@simianer.de> | 2015-12-17 15:41:45 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2015-12-17 15:41:45 +0100 |
commit | c03f111ae6eac7b5b6ac2ad4635603eededcb6a3 (patch) | |
tree | 465a6d68ea31e116e05fc11a2e52d2f85af8e57e | |
parent | 19541176d3f93e0092ca91b1413cafeceda6784e (diff) |
fix oov bug, better/fixed known rule handling
-rw-r--r-- | index.php | 3 | ||||
-rw-r--r-- | js/lfpe.js | 4 | ||||
-rwxr-xr-x | server.rb | 25 | ||||
-rw-r--r-- | views/debug.haml | 7 |
4 files changed, 26 insertions, 13 deletions
@@ -34,8 +34,7 @@ <option value="product_de-en_beta_test_C_sparse" onclick="document.getElementById('key').value=this.value;">C [sparse]</option> <option value="product_de-en_beta_test_D" onclick="document.getElementById('key').value=this.value;">D</option> <option value="product_de-en_beta_test_D_sparse" onclick="document.getElementById('key').value=this.value;">D [sparse]</option> - <option value="toy_example" onclick="document.getElementById('key').value=this.value;">toy example</option> - <option value="product_de-en_toy_example" onclick="document.getElementById('key').value=this.value;">toy example [new]</option> + <option value="product_de-en_toy_example" onclick="document.getElementById('key').value=this.value;">toy example</option> </select> </p> </div> @@ -205,8 +205,8 @@ function Next() var src = []; var tgt = []; for (var i=0; i<l; i++) { - src.push(trim(document.getElementById("oov_src"+i).value)); - tgt.push(trim(document.getElementById("oov_tgt"+i).value)); + src.push(encodeURIComponent(trim(document.getElementById("oov_src"+i).value))); + tgt.push(encodeURIComponent(trim(document.getElementById("oov_tgt"+i).value))); if (tgt[tgt.length-1] == "") { // empty correction alert("Please provide translations for all words."); //not_working(); @@ -162,13 +162,13 @@ post '/next' do # (receive post-edit, update models), send next translation grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar" src, tgt = splitpipe(data["correct"]) # format:src1\tsrc2\tsrc..|||tgt1\t.. tgt = clean_str tgt - src = src.split("\t").map { |i| i.strip } - tgt = tgt.split("\t").map { |i| i.strip } + src = src.split("\t").map { |i| URI.decode(i).strip } + tgt = tgt.split("\t").map { |i| URI.decode(i).strip } src.each_with_index { |s,i| next if s==''||tgt[i]=='' as = "" tgt[i].split.each_index { |k| as += " 0-#{k}" } - r = "[X] ||| #{s} ||| #{tgt[i]} ||| NewRule=1 OOVFix=1 ||| #{as}" + r = "[X] ||| #{s} ||| #{tgt[i]} ||| NewRule=1 ||| #{as}" $additional_rules << r } $confirmed = true @@ -215,21 +215,31 @@ post '/next' do # (receive post-edit, update models), send next translation s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") sts[s] = true } - + ats = {} + $additional_rules.each { |r| + s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") + ats[s] = true + } f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.new_rules" new_rules = new_rules.map { |r| r.as_trule_string } logmsg :server, "# rules before filtering #{new_rules.size}" _ = new_rules.dup new_rules.reject! { |rs| s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") - sts.has_key? s + sts.has_key?(s) } logmsg :server, "# rules after filtering #{new_rules.size}" (_-new_rules).each { |r| logmsg :server, "rejected rule [already known]: '#{r}'" } - $additional_rules += new_rules $rejected_rules += _-new_rules + logmsg :server, "removing known new rules, before: #{new_rules.size}" + new_rules.reject! { |rs| + s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") + ats.has_key?(s) + } + logmsg :server, "after: #{new_rules.size}" + $additional_rules += new_rules f.write new_rules.join "\n" f.close else # text interface @@ -278,11 +288,12 @@ post '/next' do # (receive post-edit, update models), send next translation # 5d actual extractor send_recv :extractor, "default_context ||| #{source} ||| #{post_edit} ||| #{a}" # 6. update database - logmsg :db, "updating database" $db['updated'] << true + `cp #{WORK_DIR}/dtrain.debug.json #{WORK_DIR}/#{$db['progress']}.dtrain.debug.json` else $db['updated'] << false end + logmsg :db, "updating database" update_database end source = $db['source_segments'][$db['progress']] diff --git a/views/debug.haml b/views/debug.haml index 2a0773d..e7b6a7b 100644 --- a/views/debug.haml +++ b/views/debug.haml @@ -42,7 +42,7 @@ %li %a.ajax{:tgt => "/shutdown", :href => "#controls"} Initiate shutdown %h3 Learning rate - %p Default for dense features: <strong>0.001</strong>, for sparse features: <strong>1.0e-05</strong> + %p Default for dense features: <strong>1.0e-05</strong>, for sparse features: <strong>1.0e-05</strong> %select - [1000,100,10,1,0.1,0.01,0.001,0.0001,0.00001,0.000001,0.0000001,0.00000001,0.000000001,0.0000000001].each do |i| %option.ajax{:value => i, :tgt => "/set_learning_rate/#{i}"} #{i} @@ -120,7 +120,7 @@ %h2#rules New & known rules %pre #{additional_rules.join("\n")} %h3 - Rejected [known] rules + Known rules %pre #{rejected_rules.join("\n")} %p %a{ :href => "#" } ^ up @@ -148,6 +148,9 @@ - if [9,89].include? j %hr %h3 Weight updates + %p Raw update: #{pairwise_ranking_data["update_raw"]} + /=%p Update (learning rates applied): <pre>#{pairwise_ranking_data["update"]}</pre> + /=- "#{pairwise_ranking_data["update"]}" %table %tr %th Feature |