From aeec004a2d99b595365e991d66d959adb010ae97 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Fri, 3 Jun 2016 13:34:18 +0200
Subject: NOGRAMMAR, summary, debug/admin, sessions, phrase2_extraction fix
---
views/summary.haml | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 53 insertions(+)
create mode 100644 views/summary.haml
diff --git a/views/summary.haml b/views/summary.haml
new file mode 100644
index 0000000..29dc0ad
--- /dev/null
+++ b/views/summary.haml
@@ -0,0 +1,53 @@
+- require 'zipf'
+!!!
+%html
+ %head
+ %title Summary (Session #{session_key})
+ %link(rel="stylesheet" type="text/css" href="debug.css")
+ %script{:src => "http://postedit.cl.uni-heidelberg.de/js/jquery.min.js"}
+ %script{:src => "http://postedit.cl.uni-heidelberg.de/js/jquery.tablesorter.min.js"}
+ %script{:src => "http://postedit.cl.uni-heidelberg.de/js/debug.js"}
+ %body
+ %h1 Summary
+ %p (Session #{session_key})
+ %p Data is shown in the MT system's formatting. BLEU is calculated without smoothing.
+ %table
+ %tr
+ %td
+ #{"#"}
+ %td
+ %strong Source
+ %td
+ %strong
+ Post-Edit
+ %td
+ %strong
+ Reference
+ %td
+ %strong
+ BLEU
+ %td
+ %strong
+ TER
+ %td
+ %strong
+ Keystrokes
+ %td
+ %strong
+ Mouse actions
+ %td
+ %strong
+ Duration
+ - data["post_edits"].each_with_index do |pe,j|
+ %tr
+ %td.center #{j+1}.
+ %td #{data["source_segments"][j]}
+ %td #{pe}
+ %td #{data["references"][j]}
+ %td.center #{(BLEU::per_sentence_bleu(pe, [data["references"][j]], 4, 0)*100).round 2}%
+ %td.center #{ter_scores[j]}
+ %td.center #{data["count_kbd"][j]}
+ %td.center #{data["count_click"][j]}
+ %td.center #{(data["durations"][j]/1000).round 1}s
+
+
--
cgit v1.2.3
From 0885987afd448fe1aedba7c6a2fdeff64c426623 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Fri, 3 Jun 2016 13:34:29 +0200
Subject: NOGRAMMAR, summary, debug/admin, sessions, phrase2_extraction fix
---
index.php | 32 ++++---
interface.php | 9 +-
js/interface.js | 7 +-
phrase2_extraction/phrase2_extraction.rb | 23 +++--
server.rb | 143 +++++++++++++++++++------------
views/debug.haml | 124 ++++++++++++++-------------
6 files changed, 200 insertions(+), 138 deletions(-)
diff --git a/index.php b/index.php
index d01caf1..66b4f43 100644
--- a/index.php
+++ b/index.php
@@ -22,41 +22,49 @@
-
-
Beta test:
+
+
Select:
diff --git a/interface.php b/interface.php
index 46b07f0..5ea96ae 100644
--- a/interface.php
+++ b/interface.php
@@ -33,7 +33,7 @@
Target: |
-
+
|
@@ -48,6 +48,10 @@ Note that the source word may be distorted.
+
+
Session finished, thank you! View
summary.
+
+
@@ -65,7 +69,8 @@ Note that the source word may be distorted.
Support:
Mail
Session: |
- Debug
+ Debug |
+ Summary
diff --git a/js/interface.js b/js/interface.js
index 9398650..4c9685d 100644
--- a/js/interface.js
+++ b/js/interface.js
@@ -398,10 +398,11 @@ var request_and_process_next = function ()
if (data["fin"]) {
target_textarea.setAttribute("disabled", "disabled");
status.style.display = "none";
- button.innerHTML = "Session finished, thank you!";
+ button.innerHTML = "---";
+ $("#view_summary").toggle()
$("#raw_source_textarea").html("");
$("#target_textarea").val("");
- $("#target_textarea").attr("rows", 1);
+ $("#target_textarea").attr("rows", 2);
button.setAttribute("disabled", "disabled");
pause_button.setAttribute("disabled", "disabled");
if (current_seg_id.value)
@@ -477,7 +478,7 @@ var request_and_process_next = function ()
$("#seg_"+(id-1)).removeClass("bold");
}
if (translation)
- target_textarea.rows = Math.round(translation.length/80+0.5);
+ target_textarea.rows = Math.round(translation.length/80+0.5)+2;
//raw_source_textarea.rows = Math.round(raw_source.length/80+0.5);
target_textarea.focus();
$("#original_mt").val(target_textarea.value);
diff --git a/phrase2_extraction/phrase2_extraction.rb b/phrase2_extraction/phrase2_extraction.rb
index 547e0be..01bdae9 100755
--- a/phrase2_extraction/phrase2_extraction.rb
+++ b/phrase2_extraction/phrase2_extraction.rb
@@ -5,9 +5,9 @@ require 'zipf'
module PhrasePhraseExtraction
DEBUG = false
-MAX_NT = 2 # Chiang: 2
-MAX_SEED_NUM_WORDS = 4 # Chiang: 10 words, -> phrases!
-MAX_SRC_SZ = 10 # Chiang: 5 words, -> words!
+MAX_NT = 1 # Chiang: 2
+MAX_SEED_NUM_WORDS = 3 # Chiang: 10 words, -> phrases!
+MAX_SRC_SZ = 7 # Chiang: 5 words, -> words!
FORBID_SRC_ADJACENT_SRC_NT = true # Chiang:true
class Rule
@@ -544,7 +544,7 @@ def PhrasePhraseExtraction.extract fstart, fend, estart, eend, f, e, a, flen, el
}
rules.last.rebase_alignment fs, estart
fe += 1
- break if has_alignment(a, fe, "src")||fe>=elen
+ break if has_alignment(a, fe, "src")||fe>=flen
end
fs -= 1
break has_alignment(a, fs, "src")||fs<0
@@ -649,7 +649,20 @@ def PhrasePhraseExtraction.remove_adjacent_nt rules
prev = false
end
}
- b
+ c = false
+ prev = false
+ r.target.each { |i|
+ if i.is_a? String
+ if prev
+ c = true
+ break
+ end
+ prev = true
+ else
+ prev = false
+ end
+ }
+ b || c
}
end
diff --git a/server.rb b/server.rb
index 599fdbd..193baf6 100755
--- a/server.rb
+++ b/server.rb
@@ -239,49 +239,52 @@ def process_next reply
logmsg :server, "post-edit after processing: '#{e.join " "}'"
f = []
data["source_raw"].each { |i| f << URI.decode(i) }
- # 2.5 new rule extraction
- $status = "Extracting rules from post edit" # status
- #grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar"
- grammar = "#{SESSION_DIR}/g/grammar.#{$db['progress']}"
- current_grammar_ids = {}
- ReadFile.readlines_strip(grammar).each { |r|
- s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- current_grammar_ids[s] = true
- }
- new_rules = PhrasePhraseExtraction.extract_rules f, e, data["align"], true
- new_rules_ids = {}
- $new_rules.each { |r|
- s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- new_rules_ids[s] = true
- }
- new_rules = new_rules.map { |r| r.as_trule_string }
- _ = new_rules.dup
- logmsg :server, "# rules before filtering #{new_rules.size}"
- new_rules.reject! { |rs|
- s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- current_grammar_ids.has_key?(s) || new_rules_ids.has_key?(s)
- }
- $new_rules += new_rules
- $new_rules.uniq! { |rs|
- splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- }
- f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.new_rules"
- f.write new_rules.join "\n"
- f.close
- logmsg :server, "# rules after filtering #{new_rules.size}"
- add_known_rules = _-new_rules
- add_known_rules.reject! { |rs|
- s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- new_rules_ids.has_key?(s)
- }
- f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.known_rules"
- f.write add_known_rules.join "\n"
- f.close
- $known_rules += add_known_rules
- $known_rules.uniq! { |rs|
- splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- }
- add_known_rules.each { |r| logmsg :server, "known_rule: '#{r}'" }
+
+ if !NOGRAMMAR
+ # 2.5 new rule extraction
+ $status = "Extracting rules from post edit" # status
+ #grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar"
+ grammar = "#{SESSION_DIR}/g/grammar.#{$db['progress']}"
+ current_grammar_ids = {}
+ ReadFile.readlines_strip(grammar).each { |r|
+ s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ current_grammar_ids[s] = true
+ }
+ new_rules = PhrasePhraseExtraction.extract_rules f, e, data["align"], true
+ new_rules_ids = {}
+ $new_rules.each { |r|
+ s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ new_rules_ids[s] = true
+ }
+ new_rules = new_rules.map { |r| r.as_trule_string }
+ _ = new_rules.dup
+ logmsg :server, "# rules before filtering #{new_rules.size}"
+ new_rules.reject! { |rs|
+ s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ current_grammar_ids.has_key?(s) || new_rules_ids.has_key?(s)
+ }
+ $new_rules += new_rules
+ $new_rules.uniq! { |rs|
+ splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ }
+ f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.new_rules"
+ f.write new_rules.join "\n"
+ f.close
+ logmsg :server, "# rules after filtering #{new_rules.size}"
+ add_known_rules = _-new_rules
+ add_known_rules.reject! { |rs|
+ s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ new_rules_ids.has_key?(s)
+ }
+ f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.known_rules"
+ f.write add_known_rules.join "\n"
+ f.close
+ $known_rules += add_known_rules
+ $known_rules.uniq! { |rs|
+ splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ }
+ add_known_rules.each { |r| logmsg :server, "known_rule: '#{r}'" }
+ end
else # text interface
post_edit = data["post_edit"]
end
@@ -328,19 +331,19 @@ def process_next reply
send_recv :dtrain, "#{annotated_source} ||| #{post_edit}"
# 5. update grammar extractor
if !$pregenerated_grammars
- # 5a. get forward alignment
- source_lc = source.downcase
- post_edit_lc = post_edit.downcase
- $status = "Aligning post-edit" # status
- a_fwd = send_recv :aligner_fwd, "#{source_lc} ||| #{post_edit_lc}"
+ # 5a. get forward alignment
+ source_lc = source.downcase
+ post_edit_lc = post_edit.downcase
+ $status = "Aligning post-edit" # status
+ a_fwd = send_recv :aligner_fwd, "#{source_lc} ||| #{post_edit_lc}"
# 5b. get backward alignment
- a_back = send_recv :aligner_back, "#{source_lc} ||| #{post_edit_lc}"
- # 5c. symmetrize alignment
- a = send_recv :atools, "#{a_fwd} ||| #{a_back}"
+ a_back = send_recv :aligner_back, "#{source_lc} ||| #{post_edit_lc}"
+ # 5c. symmetrize alignment
+ a = send_recv :atools, "#{a_fwd} ||| #{a_back}"
# 5d actual extractor
- $status = "Updating grammar extractor" # status
- msg = "default_context ||| #{source} ||| #{post_edit} ||| #{a}"
- send_recv :extractor, msg
+ $status = "Updating grammar extractor" # status
+ msg = "default_context ||| #{source} ||| #{post_edit} ||| #{a}"
+ send_recv :extractor, msg
end
# 6. update database
$db['updated'] << true
@@ -534,12 +537,18 @@ get '/debug' do # debug view
pairwise_ranking_data = JSON.parse ReadFile.read(fn).force_encoding("UTF-8")
end
+ admin = false
+ if params[:admin]
+ admin = true
+ end
+
haml :debug, :locals => { :data => data,
:pairwise_ranking_data => pairwise_ranking_data, \
:progress => $db["progress"]-1,
:new_rules => $new_rules, \
:known_rules => $known_rules, \
- :session_key => SESSION_KEY }
+ :session_key => SESSION_KEY, \
+ :admin => admin }
end
get '/new_rules' do # new/known rules
@@ -699,3 +708,27 @@ get '/shutdown' do # stop daemons and shut down server
return "stopped all daemons, ready to shutdown"
end
+get '/summary' do
+ logmsg :server, "showing summary"
+
+ data = JSON.parse ReadFile.read(DB_FILE).force_encoding("UTF-8")
+
+ ter_scores = []
+ data["post_edits"].each_with_index { |pe,j|
+ f = Tempfile.new "lfpe-summary-pe"
+ g = Tempfile.new "lfpe-summary-ref"
+ f.write pe+"\n"
+ g.write data["references"][j]+"\n"
+ f.close
+ g.close
+ ter_scores << (`#{CDEC}/mteval/fast_score -i #{f.path} -r #{g.path} -m ter 2>/dev/null`.to_f).round(2)
+ f.unlink
+ g.unlink
+ }
+
+ haml :summary, :locals => { :session_key => SESSION_KEY,
+ :data => data,
+ :ter_scores => ter_scores }
+
+end
+
diff --git a/views/debug.haml b/views/debug.haml
index 3c1e006..17f7f86 100644
--- a/views/debug.haml
+++ b/views/debug.haml
@@ -14,8 +14,9 @@
%p.red
%strong No data to show!
%ul
- %li
- %a{ :href => "#controls" } Controls
+ -if admin
+ %li
+ %a{ :href => "#controls" } Controls
%li
%a{ :href => "#post_edit" } Post-edit
%li
@@ -31,63 +32,64 @@
%p.tiny Session: #{session_key}
/=#########################################################################
- %h2#controls Controls
- %h3 Reset
- %p
- %strong [Server reply]
- %span#control_reply
- %ul
- %li
- %a.ajax{:tgt => "/reset_progress", :href => "#controls"} Reset progress
- %li
- %a.ajax{:tgt => "/reset_weights", :href => "#controls"} Reset weights
- %li
- %a.ajax{:tgt => "/reset_learning_rates", :href => "#controls"} Reset learning rates
- /
+ -if admin
+ %h2#controls Controls
+ %h3 Reset
+ %p
+ %strong [Server reply]
+ %span#control_reply
+ %ul
%li
- %a.ajax{:tgt => "/reset_extractor", :href => "#controls"} Reset extractor
- %li
- %a.ajax{:tgt => "/reset_grammars", :href => "#controls"} Reset grammars
- %li
- %a.ajax{:tgt => "/reset_new_rules", :href => "#controls"} Reset new rules
- /
+ %a.ajax{:tgt => "/reset_progress", :href => "#controls"} Reset progress
%li
- %a.ajax{:tgt => "/shutdown", :href => "#controls"} Initiate shutdown
- %h3 Learning rates & weights
- %p Get current weight/rate for specific features:
- %input#features_get{ :value => "Feature", :style => "text-align:center; width:20em" }
- %select#features_type_get
- %option{ :value => "/get_weight" } Weight
- %option{ :value => "/get_rate" } Rate
- %input#features_value_get{ :value => " ", :style => "text-align:right; width:10em" }
- %button#get_features
- Get
- %p Set weight/rate for specific features:
- %input#features{ :value => "Feature", :style => "text-align:center; width:20em" }
- %select#features_type
- %option{ :value => "/set_weights" } Weight
- %option{ :value => "/set_learning_rates" } Rate
- %input#features_value{ :value => "1e-05", :style => "text-align:right; width:10em" }
- %button#set_features
- Set
- %p
- Set rate for feature groups:
- %span.tiny (Features unseen in training/tuning have a common default rate.)
- %select#feature_groups_get
- %option{ :value => "R" } rule ids
- %option{ :value => "RB" } rule bigrams
- %option{ :value => "Shape" } rule shapes
- %input#feature_groups_value_get{ :value => "1e-05", :style => "text-align:right; width:10em" }
- %button#get_feature_groups
- Get rate
- %br
- %select#feature_groups
- %option{ :value => "R" } rule ids
- %option{ :value => "RB" } rule bigrams
- %option{ :value => "Shape" } rule shapes
- %input#feature_groups_value{ :value => "1e-05", :style => "text-align:right; width:10em" }
- %button#set_feature_groups
- Set rate
+ %a.ajax{:tgt => "/reset_weights", :href => "#controls"} Reset weights
+ %li
+ %a.ajax{:tgt => "/reset_learning_rates", :href => "#controls"} Reset learning rates
+ /
+ %li
+ %a.ajax{:tgt => "/reset_extractor", :href => "#controls"} Reset extractor
+ %li
+ %a.ajax{:tgt => "/reset_grammars", :href => "#controls"} Reset grammars
+ %li
+ %a.ajax{:tgt => "/reset_new_rules", :href => "#controls"} Reset new rules
+ /
+ %li
+ %a.ajax{:tgt => "/shutdown", :href => "#controls"} Initiate shutdown
+ %h3 Learning rates & weights
+ %p Get current weight/rate for specific features:
+ %input#features_get{ :value => "Feature", :style => "text-align:center; width:20em" }
+ %select#features_type_get
+ %option{ :value => "/get_weight" } Weight
+ %option{ :value => "/get_rate" } Rate
+ %input#features_value_get{ :value => " ", :style => "text-align:right; width:10em" }
+ %button#get_features
+ Get
+ %p Set weight/rate for specific features:
+ %input#features{ :value => "Feature", :style => "text-align:center; width:20em" }
+ %select#features_type
+ %option{ :value => "/set_weights" } Weight
+ %option{ :value => "/set_learning_rates" } Rate
+ %input#features_value{ :value => "1e-05", :style => "text-align:right; width:10em" }
+ %button#set_features
+ Set
+ %p
+ Set rate for feature groups:
+ %span.tiny (Features unseen in training/tuning have a common default rate.)
+ %select#feature_groups_get
+ %option{ :value => "R" } rule ids
+ %option{ :value => "RB" } rule bigrams
+ %option{ :value => "Shape" } rule shapes
+ %input#feature_groups_value_get{ :value => "1e-05", :style => "text-align:right; width:10em" }
+ %button#get_feature_groups
+ Get rate
+ %br
+ %select#feature_groups
+ %option{ :value => "R" } rule ids
+ %option{ :value => "RB" } rule bigrams
+ %option{ :value => "Shape" } rule shapes
+ %input#feature_groups_value{ :value => "1e-05", :style => "text-align:right; width:10em" }
+ %button#set_feature_groups
+ Set rate
/=#########################################################################
@@ -107,14 +109,14 @@
%td.noborder
%strong Progress:
%td.left.noborder ##{[0,progress].max}
- %tr
- %td.noborder
- %strong MT Input:
- %td.left.noborder #{data["source_segments"][[0,progress].max]}
%tr
%td.noborder
%strong Raw source:
%td.left.noborder #{data["raw_source_segments"][[0,progress].max]}
+ %tr
+ %td.noborder
+ %strong MT Input:
+ %td.left.noborder #{data["source_segments"][[0,progress].max]}
%tr
%td.noborder
%strong Post-edit:
--
cgit v1.2.3