From aeec004a2d99b595365e991d66d959adb010ae97 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 3 Jun 2016 13:34:18 +0200 Subject: NOGRAMMAR, summary, debug/admin, sessions, phrase2_extraction fix --- views/summary.haml | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 views/summary.haml diff --git a/views/summary.haml b/views/summary.haml new file mode 100644 index 0000000..29dc0ad --- /dev/null +++ b/views/summary.haml @@ -0,0 +1,53 @@ +- require 'zipf' +!!! +%html + %head + %title Summary (Session #{session_key}) + %link(rel="stylesheet" type="text/css" href="debug.css") + %script{:src => "http://postedit.cl.uni-heidelberg.de/js/jquery.min.js"} + %script{:src => "http://postedit.cl.uni-heidelberg.de/js/jquery.tablesorter.min.js"} + %script{:src => "http://postedit.cl.uni-heidelberg.de/js/debug.js"} + %body + %h1 Summary + %p (Session #{session_key}) + %p Data is shown in the MT system's formatting. BLEU is calculated without smoothing. + %table + %tr + %td + #{"#"} + %td + %strong Source + %td + %strong + Post-Edit + %td + %strong + Reference + %td + %strong + BLEU + %td + %strong + TER + %td + %strong + Keystrokes + %td + %strong + Mouse actions + %td + %strong + Duration + - data["post_edits"].each_with_index do |pe,j| + %tr + %td.center #{j+1}. + %td #{data["source_segments"][j]} + %td #{pe} + %td #{data["references"][j]} + %td.center #{(BLEU::per_sentence_bleu(pe, [data["references"][j]], 4, 0)*100).round 2}% + %td.center #{ter_scores[j]} + %td.center #{data["count_kbd"][j]} + %td.center #{data["count_click"][j]} + %td.center #{(data["durations"][j]/1000).round 1}s + + -- cgit v1.2.3 From 0885987afd448fe1aedba7c6a2fdeff64c426623 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 3 Jun 2016 13:34:29 +0200 Subject: NOGRAMMAR, summary, debug/admin, sessions, phrase2_extraction fix --- index.php | 32 ++++--- interface.php | 9 +- js/interface.js | 7 +- phrase2_extraction/phrase2_extraction.rb | 23 +++-- server.rb | 143 +++++++++++++++++++------------ views/debug.haml | 124 ++++++++++++++------------- 6 files changed, 200 insertions(+), 138 deletions(-) diff --git a/index.php b/index.php index d01caf1..66b4f43 100644 --- a/index.php +++ b/index.php @@ -22,41 +22,49 @@ -
-

Beta test: +

+

Select:

diff --git a/interface.php b/interface.php index 46b07f0..5ea96ae 100644 --- a/interface.php +++ b/interface.php @@ -33,7 +33,7 @@ Target: - + @@ -48,6 +48,10 @@ Note that the source word may be distorted.
+ + + +
@@ -65,7 +69,8 @@ Note that the source word may be distorted. Support: Mail

Session: | - Debug + Debug | + Summary

diff --git a/js/interface.js b/js/interface.js index 9398650..4c9685d 100644 --- a/js/interface.js +++ b/js/interface.js @@ -398,10 +398,11 @@ var request_and_process_next = function () if (data["fin"]) { target_textarea.setAttribute("disabled", "disabled"); status.style.display = "none"; - button.innerHTML = "Session finished, thank you!"; + button.innerHTML = "---"; + $("#view_summary").toggle() $("#raw_source_textarea").html(""); $("#target_textarea").val(""); - $("#target_textarea").attr("rows", 1); + $("#target_textarea").attr("rows", 2); button.setAttribute("disabled", "disabled"); pause_button.setAttribute("disabled", "disabled"); if (current_seg_id.value) @@ -477,7 +478,7 @@ var request_and_process_next = function () $("#seg_"+(id-1)).removeClass("bold"); } if (translation) - target_textarea.rows = Math.round(translation.length/80+0.5); + target_textarea.rows = Math.round(translation.length/80+0.5)+2; //raw_source_textarea.rows = Math.round(raw_source.length/80+0.5); target_textarea.focus(); $("#original_mt").val(target_textarea.value); diff --git a/phrase2_extraction/phrase2_extraction.rb b/phrase2_extraction/phrase2_extraction.rb index 547e0be..01bdae9 100755 --- a/phrase2_extraction/phrase2_extraction.rb +++ b/phrase2_extraction/phrase2_extraction.rb @@ -5,9 +5,9 @@ require 'zipf' module PhrasePhraseExtraction DEBUG = false -MAX_NT = 2 # Chiang: 2 -MAX_SEED_NUM_WORDS = 4 # Chiang: 10 words, -> phrases! -MAX_SRC_SZ = 10 # Chiang: 5 words, -> words! +MAX_NT = 1 # Chiang: 2 +MAX_SEED_NUM_WORDS = 3 # Chiang: 10 words, -> phrases! +MAX_SRC_SZ = 7 # Chiang: 5 words, -> words! FORBID_SRC_ADJACENT_SRC_NT = true # Chiang:true class Rule @@ -544,7 +544,7 @@ def PhrasePhraseExtraction.extract fstart, fend, estart, eend, f, e, a, flen, el } rules.last.rebase_alignment fs, estart fe += 1 - break if has_alignment(a, fe, "src")||fe>=elen + break if has_alignment(a, fe, "src")||fe>=flen end fs -= 1 break has_alignment(a, fs, "src")||fs<0 @@ -649,7 +649,20 @@ def PhrasePhraseExtraction.remove_adjacent_nt rules prev = false end } - b + c = false + prev = false + r.target.each { |i| + if i.is_a? String + if prev + c = true + break + end + prev = true + else + prev = false + end + } + b || c } end diff --git a/server.rb b/server.rb index 599fdbd..193baf6 100755 --- a/server.rb +++ b/server.rb @@ -239,49 +239,52 @@ def process_next reply logmsg :server, "post-edit after processing: '#{e.join " "}'" f = [] data["source_raw"].each { |i| f << URI.decode(i) } - # 2.5 new rule extraction - $status = "Extracting rules from post edit" # status - #grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar" - grammar = "#{SESSION_DIR}/g/grammar.#{$db['progress']}" - current_grammar_ids = {} - ReadFile.readlines_strip(grammar).each { |r| - s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") - current_grammar_ids[s] = true - } - new_rules = PhrasePhraseExtraction.extract_rules f, e, data["align"], true - new_rules_ids = {} - $new_rules.each { |r| - s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") - new_rules_ids[s] = true - } - new_rules = new_rules.map { |r| r.as_trule_string } - _ = new_rules.dup - logmsg :server, "# rules before filtering #{new_rules.size}" - new_rules.reject! { |rs| - s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") - current_grammar_ids.has_key?(s) || new_rules_ids.has_key?(s) - } - $new_rules += new_rules - $new_rules.uniq! { |rs| - splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") - } - f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.new_rules" - f.write new_rules.join "\n" - f.close - logmsg :server, "# rules after filtering #{new_rules.size}" - add_known_rules = _-new_rules - add_known_rules.reject! { |rs| - s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") - new_rules_ids.has_key?(s) - } - f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.known_rules" - f.write add_known_rules.join "\n" - f.close - $known_rules += add_known_rules - $known_rules.uniq! { |rs| - splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") - } - add_known_rules.each { |r| logmsg :server, "known_rule: '#{r}'" } + + if !NOGRAMMAR + # 2.5 new rule extraction + $status = "Extracting rules from post edit" # status + #grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar" + grammar = "#{SESSION_DIR}/g/grammar.#{$db['progress']}" + current_grammar_ids = {} + ReadFile.readlines_strip(grammar).each { |r| + s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") + current_grammar_ids[s] = true + } + new_rules = PhrasePhraseExtraction.extract_rules f, e, data["align"], true + new_rules_ids = {} + $new_rules.each { |r| + s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") + new_rules_ids[s] = true + } + new_rules = new_rules.map { |r| r.as_trule_string } + _ = new_rules.dup + logmsg :server, "# rules before filtering #{new_rules.size}" + new_rules.reject! { |rs| + s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") + current_grammar_ids.has_key?(s) || new_rules_ids.has_key?(s) + } + $new_rules += new_rules + $new_rules.uniq! { |rs| + splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") + } + f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.new_rules" + f.write new_rules.join "\n" + f.close + logmsg :server, "# rules after filtering #{new_rules.size}" + add_known_rules = _-new_rules + add_known_rules.reject! { |rs| + s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") + new_rules_ids.has_key?(s) + } + f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.known_rules" + f.write add_known_rules.join "\n" + f.close + $known_rules += add_known_rules + $known_rules.uniq! { |rs| + splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") + } + add_known_rules.each { |r| logmsg :server, "known_rule: '#{r}'" } + end else # text interface post_edit = data["post_edit"] end @@ -328,19 +331,19 @@ def process_next reply send_recv :dtrain, "#{annotated_source} ||| #{post_edit}" # 5. update grammar extractor if !$pregenerated_grammars - # 5a. get forward alignment - source_lc = source.downcase - post_edit_lc = post_edit.downcase - $status = "Aligning post-edit" # status - a_fwd = send_recv :aligner_fwd, "#{source_lc} ||| #{post_edit_lc}" + # 5a. get forward alignment + source_lc = source.downcase + post_edit_lc = post_edit.downcase + $status = "Aligning post-edit" # status + a_fwd = send_recv :aligner_fwd, "#{source_lc} ||| #{post_edit_lc}" # 5b. get backward alignment - a_back = send_recv :aligner_back, "#{source_lc} ||| #{post_edit_lc}" - # 5c. symmetrize alignment - a = send_recv :atools, "#{a_fwd} ||| #{a_back}" + a_back = send_recv :aligner_back, "#{source_lc} ||| #{post_edit_lc}" + # 5c. symmetrize alignment + a = send_recv :atools, "#{a_fwd} ||| #{a_back}" # 5d actual extractor - $status = "Updating grammar extractor" # status - msg = "default_context ||| #{source} ||| #{post_edit} ||| #{a}" - send_recv :extractor, msg + $status = "Updating grammar extractor" # status + msg = "default_context ||| #{source} ||| #{post_edit} ||| #{a}" + send_recv :extractor, msg end # 6. update database $db['updated'] << true @@ -534,12 +537,18 @@ get '/debug' do # debug view pairwise_ranking_data = JSON.parse ReadFile.read(fn).force_encoding("UTF-8") end + admin = false + if params[:admin] + admin = true + end + haml :debug, :locals => { :data => data, :pairwise_ranking_data => pairwise_ranking_data, \ :progress => $db["progress"]-1, :new_rules => $new_rules, \ :known_rules => $known_rules, \ - :session_key => SESSION_KEY } + :session_key => SESSION_KEY, \ + :admin => admin } end get '/new_rules' do # new/known rules @@ -699,3 +708,27 @@ get '/shutdown' do # stop daemons and shut down server return "stopped all daemons, ready to shutdown" end +get '/summary' do + logmsg :server, "showing summary" + + data = JSON.parse ReadFile.read(DB_FILE).force_encoding("UTF-8") + + ter_scores = [] + data["post_edits"].each_with_index { |pe,j| + f = Tempfile.new "lfpe-summary-pe" + g = Tempfile.new "lfpe-summary-ref" + f.write pe+"\n" + g.write data["references"][j]+"\n" + f.close + g.close + ter_scores << (`#{CDEC}/mteval/fast_score -i #{f.path} -r #{g.path} -m ter 2>/dev/null`.to_f).round(2) + f.unlink + g.unlink + } + + haml :summary, :locals => { :session_key => SESSION_KEY, + :data => data, + :ter_scores => ter_scores } + +end + diff --git a/views/debug.haml b/views/debug.haml index 3c1e006..17f7f86 100644 --- a/views/debug.haml +++ b/views/debug.haml @@ -14,8 +14,9 @@ %p.red %strong No data to show! %ul - %li - %a{ :href => "#controls" } Controls + -if admin + %li + %a{ :href => "#controls" } Controls %li %a{ :href => "#post_edit" } Post-edit %li @@ -31,63 +32,64 @@ %p.tiny Session: #{session_key} /=######################################################################### - %h2#controls Controls - %h3 Reset - %p - %strong [Server reply] - %span#control_reply - %ul - %li - %a.ajax{:tgt => "/reset_progress", :href => "#controls"} Reset progress - %li - %a.ajax{:tgt => "/reset_weights", :href => "#controls"} Reset weights - %li - %a.ajax{:tgt => "/reset_learning_rates", :href => "#controls"} Reset learning rates - / + -if admin + %h2#controls Controls + %h3 Reset + %p + %strong [Server reply] + %span#control_reply + %ul %li - %a.ajax{:tgt => "/reset_extractor", :href => "#controls"} Reset extractor - %li - %a.ajax{:tgt => "/reset_grammars", :href => "#controls"} Reset grammars - %li - %a.ajax{:tgt => "/reset_new_rules", :href => "#controls"} Reset new rules - / + %a.ajax{:tgt => "/reset_progress", :href => "#controls"} Reset progress %li - %a.ajax{:tgt => "/shutdown", :href => "#controls"} Initiate shutdown - %h3 Learning rates & weights - %p Get current weight/rate for specific features: - %input#features_get{ :value => "Feature", :style => "text-align:center; width:20em" } - %select#features_type_get - %option{ :value => "/get_weight" } Weight - %option{ :value => "/get_rate" } Rate - %input#features_value_get{ :value => " ", :style => "text-align:right; width:10em" } - %button#get_features - Get - %p Set weight/rate for specific features: - %input#features{ :value => "Feature", :style => "text-align:center; width:20em" } - %select#features_type - %option{ :value => "/set_weights" } Weight - %option{ :value => "/set_learning_rates" } Rate - %input#features_value{ :value => "1e-05", :style => "text-align:right; width:10em" } - %button#set_features - Set - %p - Set rate for feature groups: - %span.tiny (Features unseen in training/tuning have a common default rate.) - %select#feature_groups_get - %option{ :value => "R" } rule ids - %option{ :value => "RB" } rule bigrams - %option{ :value => "Shape" } rule shapes - %input#feature_groups_value_get{ :value => "1e-05", :style => "text-align:right; width:10em" } - %button#get_feature_groups - Get rate - %br - %select#feature_groups - %option{ :value => "R" } rule ids - %option{ :value => "RB" } rule bigrams - %option{ :value => "Shape" } rule shapes - %input#feature_groups_value{ :value => "1e-05", :style => "text-align:right; width:10em" } - %button#set_feature_groups - Set rate + %a.ajax{:tgt => "/reset_weights", :href => "#controls"} Reset weights + %li + %a.ajax{:tgt => "/reset_learning_rates", :href => "#controls"} Reset learning rates + / + %li + %a.ajax{:tgt => "/reset_extractor", :href => "#controls"} Reset extractor + %li + %a.ajax{:tgt => "/reset_grammars", :href => "#controls"} Reset grammars + %li + %a.ajax{:tgt => "/reset_new_rules", :href => "#controls"} Reset new rules + / + %li + %a.ajax{:tgt => "/shutdown", :href => "#controls"} Initiate shutdown + %h3 Learning rates & weights + %p Get current weight/rate for specific features: + %input#features_get{ :value => "Feature", :style => "text-align:center; width:20em" } + %select#features_type_get + %option{ :value => "/get_weight" } Weight + %option{ :value => "/get_rate" } Rate + %input#features_value_get{ :value => " ", :style => "text-align:right; width:10em" } + %button#get_features + Get + %p Set weight/rate for specific features: + %input#features{ :value => "Feature", :style => "text-align:center; width:20em" } + %select#features_type + %option{ :value => "/set_weights" } Weight + %option{ :value => "/set_learning_rates" } Rate + %input#features_value{ :value => "1e-05", :style => "text-align:right; width:10em" } + %button#set_features + Set + %p + Set rate for feature groups: + %span.tiny (Features unseen in training/tuning have a common default rate.) + %select#feature_groups_get + %option{ :value => "R" } rule ids + %option{ :value => "RB" } rule bigrams + %option{ :value => "Shape" } rule shapes + %input#feature_groups_value_get{ :value => "1e-05", :style => "text-align:right; width:10em" } + %button#get_feature_groups + Get rate + %br + %select#feature_groups + %option{ :value => "R" } rule ids + %option{ :value => "RB" } rule bigrams + %option{ :value => "Shape" } rule shapes + %input#feature_groups_value{ :value => "1e-05", :style => "text-align:right; width:10em" } + %button#set_feature_groups + Set rate /=######################################################################### @@ -107,14 +109,14 @@ %td.noborder %strong Progress: %td.left.noborder ##{[0,progress].max} - %tr - %td.noborder - %strong MT Input: - %td.left.noborder #{data["source_segments"][[0,progress].max]} %tr %td.noborder %strong Raw source: %td.left.noborder #{data["raw_source_segments"][[0,progress].max]} + %tr + %td.noborder + %strong MT Input: + %td.left.noborder #{data["source_segments"][[0,progress].max]} %tr %td.noborder %strong Post-edit: -- cgit v1.2.3