summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2016-06-03 13:34:29 +0200
committerPatrick Simianer <p@simianer.de>2016-06-03 13:34:29 +0200
commit0885987afd448fe1aedba7c6a2fdeff64c426623 (patch)
tree2d6a8b957d3514769ee80d7a66cf0231133160ae
parentaeec004a2d99b595365e991d66d959adb010ae97 (diff)
NOGRAMMAR, summary, debug/admin, sessions, phrase2_extraction fix
-rw-r--r--index.php32
-rw-r--r--interface.php9
-rw-r--r--js/interface.js7
-rwxr-xr-xphrase2_extraction/phrase2_extraction.rb23
-rwxr-xr-xserver.rb143
-rw-r--r--views/debug.haml124
6 files changed, 200 insertions, 138 deletions
diff --git a/index.php b/index.php
index d01caf1..66b4f43 100644
--- a/index.php
+++ b/index.php
@@ -22,41 +22,49 @@
<input type="submit" value="Submit" />
</form>
-<div class="small" style="background:#eee;margin: 5em 0 5em 0;padding:.5em; max-width:25%">
-<p>Beta test:
+<div class="small" style="background:#eee;margin: 5em 0 5em 0;padding:.5em; max-width:10%">
+<p>Select:
<select class="small">
+ <option value="final_model_en-de_g0" onclick="document.getElementById('key').value=this.value;">0</option>
+ <option value="final_model_en-de_g1" onclick="document.getElementById('key').value=this.value;">1</option>
+ <option value="final_model_en-de_g2" onclick="document.getElementById('key').value=this.value;">2</option>
+ <option value="final_model_en-de_g3" onclick="document.getElementById('key').value=this.value;">3</option>
+ <option value="final_model_en-de_g4" onclick="document.getElementById('key').value=this.value;">4</option>
+ <option value="final_model_en-de_g5" onclick="document.getElementById('key').value=this.value;">5</option>
+ <option value="final_model_en-de_g6" onclick="document.getElementById('key').value=this.value;">6</option>
+
<!--<optgroup label="________________">
<option value="product_de-en_toy_example" onclick="document.getElementById('key').value=this.value;">toy example</option>
</optgroup>-->
-<optgroup label="Session A">
+<!--<optgroup label="Session A">
<option value="product_de-en_beta_test_A" onclick="document.getElementById('key').value=this.value;">de-en</option>
<option value="product_en-de_beta_test_A" onclick="document.getElementById('key').value=this.value;">en-de</option>
-<!-- <option value="product_de-en_beta_test_1_A" onclick="document.getElementById('key').value=this.value;">A* de-en</option>
- <option value="product_en-de_beta_test_1_A" onclick="document.getElementById('key').value=this.value;">A* en-de</option>-->
+ <option value="product_de-en_beta_test_1_A" onclick="document.getElementById('key').value=this.value;">A* de-en</option>
+ <option value="product_en-de_beta_test_1_A" onclick="document.getElementById('key').value=this.value;">A* en-de</option>
</optgroup>
<optgroup label="Session B">
<option value="product_de-en_beta_test_B" onclick="document.getElementById('key').value=this.value;">de-en</option>
<option value="product_en-de_beta_test_B" onclick="document.getElementById('key').value=this.value;">en-de</option>
-<!-- <option value="product_de-en_beta_test_1_B" onclick="document.getElementById('key').value=this.value;">B* de-en</option>
- <option value="product_en-de_beta_test_1_B" onclick="document.getElementById('key').value=this.value;">B* en-de</option>-->
+ <option value="product_de-en_beta_test_1_B" onclick="document.getElementById('key').value=this.value;">B* de-en</option>
+ <option value="product_en-de_beta_test_1_B" onclick="document.getElementById('key').value=this.value;">B* en-de</option>
</optgroup>
<optgroup label="Session C">
<option value="product_de-en_beta_test_C" onclick="document.getElementById('key').value=this.value;">de-en</option>
<option value="product_en-de_beta_test_C" onclick="document.getElementById('key').value=this.value;">en-de</option>
-<!-- <option value="product_en-de_beta_test_1_C" onclick="document.getElementById('key').value=this.value;">C* en-de</option>
- <option value="product_de-en_beta_test_1_C" onclick="document.getElementById('key').value=this.value;">C* de-en</option>-->
+ <option value="product_en-de_beta_test_1_C" onclick="document.getElementById('key').value=this.value;">C* en-de</option>
+ <option value="product_de-en_beta_test_1_C" onclick="document.getElementById('key').value=this.value;">C* de-en</option>
</optgroup>
<optgroup label="Session D">
<option value="product_de-en_beta_test_D" onclick="document.getElementById('key').value=this.value;">de-en</option>
<option value="product_en-de_beta_test_D" onclick="document.getElementById('key').value=this.value;">en-de</option>
-<!-- <option value="product_de-en_beta_test_1_D" onclick="document.getElementById('key').value=this.value;">D* de-en</option>
- <option value="product_en-de_beta_test_1_D" onclick="document.getElementById('key').value=this.value;">D* en-de</option>-->
-</optgroup>
+ <option value="product_de-en_beta_test_1_D" onclick="document.getElementById('key').value=this.value;">D* de-en</option>
+ <option value="product_en-de_beta_test_1_D" onclick="document.getElementById('key').value=this.value;">D* en-de</option>
+</optgroup>-->
</select>
</p>
diff --git a/interface.php b/interface.php
index 46b07f0..5ea96ae 100644
--- a/interface.php
+++ b/interface.php
@@ -33,7 +33,7 @@
<tr>
<td align="right">Target:</td>
<td>
- <textarea id="target_textarea" name="target" cols="80" rows="1" onkeypress="TEXT_handle_keypress(event);" disabled></textarea>
+ <textarea id="target_textarea" name="target" cols="80" rows="2" onkeypress="TEXT_handle_keypress(event);" disabled></textarea>
</td>
</tr>
</table>
@@ -48,6 +48,10 @@ Note that the source word may be distorted.
</div>
<!-- /Source and target textboxes -->
+<!-- Summary -->
+<div id="view_summary" style="display:none;margin:2em"><strong>Session finished, thank you!</strong> View <a href="http://postedit.cl.uni-heidelberg.de:<?php echo $db->port; ?>/summary" target="_blank">summary</a>.</div>
+<!-- /Summary -->
+
<!-- Buttons -->
<div>
<button id="help_button" class="button" onclick="$('#help').toggle('blind')">Help</button>
@@ -65,7 +69,8 @@ Note that the source word may be distorted.
Support: <a href="mailto://simianer@cl.uni-heidelberg.de">Mail</a>
</p>
<p class="tiny">Session: <?php echo $_GET["key"]; ?> |
- <a href="http://postedit.cl.uni-heidelberg.de:<?php echo $db->port; ?>/debug" target="_blank">Debug</a>
+ <a href="http://postedit.cl.uni-heidelberg.de:<?php echo $db->port; ?>/debug" target="_blank">Debug</a> |
+ <a href="http://postedit.cl.uni-heidelberg.de:<?php echo $db->port; ?>/summary" target="_blank">Summary</a>
</p>
</div>
<!-- /Help -->
diff --git a/js/interface.js b/js/interface.js
index 9398650..4c9685d 100644
--- a/js/interface.js
+++ b/js/interface.js
@@ -398,10 +398,11 @@ var request_and_process_next = function ()
if (data["fin"]) {
target_textarea.setAttribute("disabled", "disabled");
status.style.display = "none";
- button.innerHTML = "Session finished, thank you!";
+ button.innerHTML = "---";
+ $("#view_summary").toggle()
$("#raw_source_textarea").html("");
$("#target_textarea").val("");
- $("#target_textarea").attr("rows", 1);
+ $("#target_textarea").attr("rows", 2);
button.setAttribute("disabled", "disabled");
pause_button.setAttribute("disabled", "disabled");
if (current_seg_id.value)
@@ -477,7 +478,7 @@ var request_and_process_next = function ()
$("#seg_"+(id-1)).removeClass("bold");
}
if (translation)
- target_textarea.rows = Math.round(translation.length/80+0.5);
+ target_textarea.rows = Math.round(translation.length/80+0.5)+2;
//raw_source_textarea.rows = Math.round(raw_source.length/80+0.5);
target_textarea.focus();
$("#original_mt").val(target_textarea.value);
diff --git a/phrase2_extraction/phrase2_extraction.rb b/phrase2_extraction/phrase2_extraction.rb
index 547e0be..01bdae9 100755
--- a/phrase2_extraction/phrase2_extraction.rb
+++ b/phrase2_extraction/phrase2_extraction.rb
@@ -5,9 +5,9 @@ require 'zipf'
module PhrasePhraseExtraction
DEBUG = false
-MAX_NT = 2 # Chiang: 2
-MAX_SEED_NUM_WORDS = 4 # Chiang: 10 words, -> phrases!
-MAX_SRC_SZ = 10 # Chiang: 5 words, -> words!
+MAX_NT = 1 # Chiang: 2
+MAX_SEED_NUM_WORDS = 3 # Chiang: 10 words, -> phrases!
+MAX_SRC_SZ = 7 # Chiang: 5 words, -> words!
FORBID_SRC_ADJACENT_SRC_NT = true # Chiang:true
class Rule
@@ -544,7 +544,7 @@ def PhrasePhraseExtraction.extract fstart, fend, estart, eend, f, e, a, flen, el
}
rules.last.rebase_alignment fs, estart
fe += 1
- break if has_alignment(a, fe, "src")||fe>=elen
+ break if has_alignment(a, fe, "src")||fe>=flen
end
fs -= 1
break has_alignment(a, fs, "src")||fs<0
@@ -649,7 +649,20 @@ def PhrasePhraseExtraction.remove_adjacent_nt rules
prev = false
end
}
- b
+ c = false
+ prev = false
+ r.target.each { |i|
+ if i.is_a? String
+ if prev
+ c = true
+ break
+ end
+ prev = true
+ else
+ prev = false
+ end
+ }
+ b || c
}
end
diff --git a/server.rb b/server.rb
index 599fdbd..193baf6 100755
--- a/server.rb
+++ b/server.rb
@@ -239,49 +239,52 @@ def process_next reply
logmsg :server, "post-edit after processing: '#{e.join " "}'"
f = []
data["source_raw"].each { |i| f << URI.decode(i) }
- # 2.5 new rule extraction
- $status = "Extracting rules from post edit" # status
- #grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar"
- grammar = "#{SESSION_DIR}/g/grammar.#{$db['progress']}"
- current_grammar_ids = {}
- ReadFile.readlines_strip(grammar).each { |r|
- s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- current_grammar_ids[s] = true
- }
- new_rules = PhrasePhraseExtraction.extract_rules f, e, data["align"], true
- new_rules_ids = {}
- $new_rules.each { |r|
- s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- new_rules_ids[s] = true
- }
- new_rules = new_rules.map { |r| r.as_trule_string }
- _ = new_rules.dup
- logmsg :server, "# rules before filtering #{new_rules.size}"
- new_rules.reject! { |rs|
- s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- current_grammar_ids.has_key?(s) || new_rules_ids.has_key?(s)
- }
- $new_rules += new_rules
- $new_rules.uniq! { |rs|
- splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- }
- f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.new_rules"
- f.write new_rules.join "\n"
- f.close
- logmsg :server, "# rules after filtering #{new_rules.size}"
- add_known_rules = _-new_rules
- add_known_rules.reject! { |rs|
- s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- new_rules_ids.has_key?(s)
- }
- f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.known_rules"
- f.write add_known_rules.join "\n"
- f.close
- $known_rules += add_known_rules
- $known_rules.uniq! { |rs|
- splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
- }
- add_known_rules.each { |r| logmsg :server, "known_rule: '#{r}'" }
+
+ if !NOGRAMMAR
+ # 2.5 new rule extraction
+ $status = "Extracting rules from post edit" # status
+ #grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar"
+ grammar = "#{SESSION_DIR}/g/grammar.#{$db['progress']}"
+ current_grammar_ids = {}
+ ReadFile.readlines_strip(grammar).each { |r|
+ s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ current_grammar_ids[s] = true
+ }
+ new_rules = PhrasePhraseExtraction.extract_rules f, e, data["align"], true
+ new_rules_ids = {}
+ $new_rules.each { |r|
+ s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ new_rules_ids[s] = true
+ }
+ new_rules = new_rules.map { |r| r.as_trule_string }
+ _ = new_rules.dup
+ logmsg :server, "# rules before filtering #{new_rules.size}"
+ new_rules.reject! { |rs|
+ s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ current_grammar_ids.has_key?(s) || new_rules_ids.has_key?(s)
+ }
+ $new_rules += new_rules
+ $new_rules.uniq! { |rs|
+ splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ }
+ f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.new_rules"
+ f.write new_rules.join "\n"
+ f.close
+ logmsg :server, "# rules after filtering #{new_rules.size}"
+ add_known_rules = _-new_rules
+ add_known_rules.reject! { |rs|
+ s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ new_rules_ids.has_key?(s)
+ }
+ f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.known_rules"
+ f.write add_known_rules.join "\n"
+ f.close
+ $known_rules += add_known_rules
+ $known_rules.uniq! { |rs|
+ splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ")
+ }
+ add_known_rules.each { |r| logmsg :server, "known_rule: '#{r}'" }
+ end
else # text interface
post_edit = data["post_edit"]
end
@@ -328,19 +331,19 @@ def process_next reply
send_recv :dtrain, "#{annotated_source} ||| #{post_edit}"
# 5. update grammar extractor
if !$pregenerated_grammars
- # 5a. get forward alignment
- source_lc = source.downcase
- post_edit_lc = post_edit.downcase
- $status = "Aligning post-edit" # status
- a_fwd = send_recv :aligner_fwd, "#{source_lc} ||| #{post_edit_lc}"
+ # 5a. get forward alignment
+ source_lc = source.downcase
+ post_edit_lc = post_edit.downcase
+ $status = "Aligning post-edit" # status
+ a_fwd = send_recv :aligner_fwd, "#{source_lc} ||| #{post_edit_lc}"
# 5b. get backward alignment
- a_back = send_recv :aligner_back, "#{source_lc} ||| #{post_edit_lc}"
- # 5c. symmetrize alignment
- a = send_recv :atools, "#{a_fwd} ||| #{a_back}"
+ a_back = send_recv :aligner_back, "#{source_lc} ||| #{post_edit_lc}"
+ # 5c. symmetrize alignment
+ a = send_recv :atools, "#{a_fwd} ||| #{a_back}"
# 5d actual extractor
- $status = "Updating grammar extractor" # status
- msg = "default_context ||| #{source} ||| #{post_edit} ||| #{a}"
- send_recv :extractor, msg
+ $status = "Updating grammar extractor" # status
+ msg = "default_context ||| #{source} ||| #{post_edit} ||| #{a}"
+ send_recv :extractor, msg
end
# 6. update database
$db['updated'] << true
@@ -534,12 +537,18 @@ get '/debug' do # debug view
pairwise_ranking_data = JSON.parse ReadFile.read(fn).force_encoding("UTF-8")
end
+ admin = false
+ if params[:admin]
+ admin = true
+ end
+
haml :debug, :locals => { :data => data,
:pairwise_ranking_data => pairwise_ranking_data, \
:progress => $db["progress"]-1,
:new_rules => $new_rules, \
:known_rules => $known_rules, \
- :session_key => SESSION_KEY }
+ :session_key => SESSION_KEY, \
+ :admin => admin }
end
get '/new_rules' do # new/known rules
@@ -699,3 +708,27 @@ get '/shutdown' do # stop daemons and shut down server
return "stopped all daemons, ready to shutdown"
end
+get '/summary' do
+ logmsg :server, "showing summary"
+
+ data = JSON.parse ReadFile.read(DB_FILE).force_encoding("UTF-8")
+
+ ter_scores = []
+ data["post_edits"].each_with_index { |pe,j|
+ f = Tempfile.new "lfpe-summary-pe"
+ g = Tempfile.new "lfpe-summary-ref"
+ f.write pe+"\n"
+ g.write data["references"][j]+"\n"
+ f.close
+ g.close
+ ter_scores << (`#{CDEC}/mteval/fast_score -i #{f.path} -r #{g.path} -m ter 2>/dev/null`.to_f).round(2)
+ f.unlink
+ g.unlink
+ }
+
+ haml :summary, :locals => { :session_key => SESSION_KEY,
+ :data => data,
+ :ter_scores => ter_scores }
+
+end
+
diff --git a/views/debug.haml b/views/debug.haml
index 3c1e006..17f7f86 100644
--- a/views/debug.haml
+++ b/views/debug.haml
@@ -14,8 +14,9 @@
%p.red
%strong No data to show!
%ul
- %li
- %a{ :href => "#controls" } Controls
+ -if admin
+ %li
+ %a{ :href => "#controls" } Controls
%li
%a{ :href => "#post_edit" } Post-edit
%li
@@ -31,63 +32,64 @@
%p.tiny Session: #{session_key}
/=#########################################################################
- %h2#controls Controls
- %h3 Reset
- %p
- %strong [Server reply]
- %span#control_reply
- %ul
- %li
- %a.ajax{:tgt => "/reset_progress", :href => "#controls"} Reset progress
- %li
- %a.ajax{:tgt => "/reset_weights", :href => "#controls"} Reset weights
- %li
- %a.ajax{:tgt => "/reset_learning_rates", :href => "#controls"} Reset learning rates
- /
+ -if admin
+ %h2#controls Controls
+ %h3 Reset
+ %p
+ %strong [Server reply]
+ %span#control_reply
+ %ul
%li
- %a.ajax{:tgt => "/reset_extractor", :href => "#controls"} Reset extractor
- %li
- %a.ajax{:tgt => "/reset_grammars", :href => "#controls"} Reset grammars
- %li
- %a.ajax{:tgt => "/reset_new_rules", :href => "#controls"} Reset new rules
- /
+ %a.ajax{:tgt => "/reset_progress", :href => "#controls"} Reset progress
%li
- %a.ajax{:tgt => "/shutdown", :href => "#controls"} Initiate shutdown
- %h3 Learning rates &amp; weights
- %p Get current weight/rate for specific features:
- %input#features_get{ :value => "Feature", :style => "text-align:center; width:20em" }
- %select#features_type_get
- %option{ :value => "/get_weight" } Weight
- %option{ :value => "/get_rate" } Rate
- %input#features_value_get{ :value => " ", :style => "text-align:right; width:10em" }
- %button#get_features
- Get
- %p Set weight/rate for specific features:
- %input#features{ :value => "Feature", :style => "text-align:center; width:20em" }
- %select#features_type
- %option{ :value => "/set_weights" } Weight
- %option{ :value => "/set_learning_rates" } Rate
- %input#features_value{ :value => "1e-05", :style => "text-align:right; width:10em" }
- %button#set_features
- Set
- %p
- Set rate for feature groups:
- %span.tiny (Features unseen in training/tuning have a common default rate.)
- %select#feature_groups_get
- %option{ :value => "R" } rule ids
- %option{ :value => "RB" } rule bigrams
- %option{ :value => "Shape" } rule shapes
- %input#feature_groups_value_get{ :value => "1e-05", :style => "text-align:right; width:10em" }
- %button#get_feature_groups
- Get rate
- %br
- %select#feature_groups
- %option{ :value => "R" } rule ids
- %option{ :value => "RB" } rule bigrams
- %option{ :value => "Shape" } rule shapes
- %input#feature_groups_value{ :value => "1e-05", :style => "text-align:right; width:10em" }
- %button#set_feature_groups
- Set rate
+ %a.ajax{:tgt => "/reset_weights", :href => "#controls"} Reset weights
+ %li
+ %a.ajax{:tgt => "/reset_learning_rates", :href => "#controls"} Reset learning rates
+ /
+ %li
+ %a.ajax{:tgt => "/reset_extractor", :href => "#controls"} Reset extractor
+ %li
+ %a.ajax{:tgt => "/reset_grammars", :href => "#controls"} Reset grammars
+ %li
+ %a.ajax{:tgt => "/reset_new_rules", :href => "#controls"} Reset new rules
+ /
+ %li
+ %a.ajax{:tgt => "/shutdown", :href => "#controls"} Initiate shutdown
+ %h3 Learning rates &amp; weights
+ %p Get current weight/rate for specific features:
+ %input#features_get{ :value => "Feature", :style => "text-align:center; width:20em" }
+ %select#features_type_get
+ %option{ :value => "/get_weight" } Weight
+ %option{ :value => "/get_rate" } Rate
+ %input#features_value_get{ :value => " ", :style => "text-align:right; width:10em" }
+ %button#get_features
+ Get
+ %p Set weight/rate for specific features:
+ %input#features{ :value => "Feature", :style => "text-align:center; width:20em" }
+ %select#features_type
+ %option{ :value => "/set_weights" } Weight
+ %option{ :value => "/set_learning_rates" } Rate
+ %input#features_value{ :value => "1e-05", :style => "text-align:right; width:10em" }
+ %button#set_features
+ Set
+ %p
+ Set rate for feature groups:
+ %span.tiny (Features unseen in training/tuning have a common default rate.)
+ %select#feature_groups_get
+ %option{ :value => "R" } rule ids
+ %option{ :value => "RB" } rule bigrams
+ %option{ :value => "Shape" } rule shapes
+ %input#feature_groups_value_get{ :value => "1e-05", :style => "text-align:right; width:10em" }
+ %button#get_feature_groups
+ Get rate
+ %br
+ %select#feature_groups
+ %option{ :value => "R" } rule ids
+ %option{ :value => "RB" } rule bigrams
+ %option{ :value => "Shape" } rule shapes
+ %input#feature_groups_value{ :value => "1e-05", :style => "text-align:right; width:10em" }
+ %button#set_feature_groups
+ Set rate
/=#########################################################################
@@ -109,14 +111,14 @@
%td.left.noborder ##{[0,progress].max}
%tr
%td.noborder
- %strong MT Input:
- %td.left.noborder #{data["source_segments"][[0,progress].max]}
- %tr
- %td.noborder
%strong Raw source:
%td.left.noborder #{data["raw_source_segments"][[0,progress].max]}
%tr
%td.noborder
+ %strong MT Input:
+ %td.left.noborder #{data["source_segments"][[0,progress].max]}
+ %tr
+ %td.noborder
%strong Post-edit:
%td.left.noborder #{data["post_edits_raw"][progress]}
%tr