diff options
-rw-r--r-- | index.php | 32 | ||||
-rw-r--r-- | interface.php | 9 | ||||
-rw-r--r-- | js/interface.js | 7 | ||||
-rwxr-xr-x | phrase2_extraction/phrase2_extraction.rb | 23 | ||||
-rwxr-xr-x | server.rb | 143 | ||||
-rw-r--r-- | views/debug.haml | 124 | ||||
-rw-r--r-- | views/summary.haml | 53 |
7 files changed, 253 insertions, 138 deletions
@@ -22,41 +22,49 @@ <input type="submit" value="Submit" /> </form> -<div class="small" style="background:#eee;margin: 5em 0 5em 0;padding:.5em; max-width:25%"> -<p>Beta test: +<div class="small" style="background:#eee;margin: 5em 0 5em 0;padding:.5em; max-width:10%"> +<p>Select: <select class="small"> + <option value="final_model_en-de_g0" onclick="document.getElementById('key').value=this.value;">0</option> + <option value="final_model_en-de_g1" onclick="document.getElementById('key').value=this.value;">1</option> + <option value="final_model_en-de_g2" onclick="document.getElementById('key').value=this.value;">2</option> + <option value="final_model_en-de_g3" onclick="document.getElementById('key').value=this.value;">3</option> + <option value="final_model_en-de_g4" onclick="document.getElementById('key').value=this.value;">4</option> + <option value="final_model_en-de_g5" onclick="document.getElementById('key').value=this.value;">5</option> + <option value="final_model_en-de_g6" onclick="document.getElementById('key').value=this.value;">6</option> + <!--<optgroup label="________________"> <option value="product_de-en_toy_example" onclick="document.getElementById('key').value=this.value;">toy example</option> </optgroup>--> -<optgroup label="Session A"> +<!--<optgroup label="Session A"> <option value="product_de-en_beta_test_A" onclick="document.getElementById('key').value=this.value;">de-en</option> <option value="product_en-de_beta_test_A" onclick="document.getElementById('key').value=this.value;">en-de</option> -<!-- <option value="product_de-en_beta_test_1_A" onclick="document.getElementById('key').value=this.value;">A* de-en</option> - <option value="product_en-de_beta_test_1_A" onclick="document.getElementById('key').value=this.value;">A* en-de</option>--> + <option value="product_de-en_beta_test_1_A" onclick="document.getElementById('key').value=this.value;">A* de-en</option> + <option value="product_en-de_beta_test_1_A" onclick="document.getElementById('key').value=this.value;">A* en-de</option> </optgroup> <optgroup label="Session B"> <option value="product_de-en_beta_test_B" onclick="document.getElementById('key').value=this.value;">de-en</option> <option value="product_en-de_beta_test_B" onclick="document.getElementById('key').value=this.value;">en-de</option> -<!-- <option value="product_de-en_beta_test_1_B" onclick="document.getElementById('key').value=this.value;">B* de-en</option> - <option value="product_en-de_beta_test_1_B" onclick="document.getElementById('key').value=this.value;">B* en-de</option>--> + <option value="product_de-en_beta_test_1_B" onclick="document.getElementById('key').value=this.value;">B* de-en</option> + <option value="product_en-de_beta_test_1_B" onclick="document.getElementById('key').value=this.value;">B* en-de</option> </optgroup> <optgroup label="Session C"> <option value="product_de-en_beta_test_C" onclick="document.getElementById('key').value=this.value;">de-en</option> <option value="product_en-de_beta_test_C" onclick="document.getElementById('key').value=this.value;">en-de</option> -<!-- <option value="product_en-de_beta_test_1_C" onclick="document.getElementById('key').value=this.value;">C* en-de</option> - <option value="product_de-en_beta_test_1_C" onclick="document.getElementById('key').value=this.value;">C* de-en</option>--> + <option value="product_en-de_beta_test_1_C" onclick="document.getElementById('key').value=this.value;">C* en-de</option> + <option value="product_de-en_beta_test_1_C" onclick="document.getElementById('key').value=this.value;">C* de-en</option> </optgroup> <optgroup label="Session D"> <option value="product_de-en_beta_test_D" onclick="document.getElementById('key').value=this.value;">de-en</option> <option value="product_en-de_beta_test_D" onclick="document.getElementById('key').value=this.value;">en-de</option> -<!-- <option value="product_de-en_beta_test_1_D" onclick="document.getElementById('key').value=this.value;">D* de-en</option> - <option value="product_en-de_beta_test_1_D" onclick="document.getElementById('key').value=this.value;">D* en-de</option>--> -</optgroup> + <option value="product_de-en_beta_test_1_D" onclick="document.getElementById('key').value=this.value;">D* de-en</option> + <option value="product_en-de_beta_test_1_D" onclick="document.getElementById('key').value=this.value;">D* en-de</option> +</optgroup>--> </select> </p> diff --git a/interface.php b/interface.php index 46b07f0..5ea96ae 100644 --- a/interface.php +++ b/interface.php @@ -33,7 +33,7 @@ <tr> <td align="right">Target:</td> <td> - <textarea id="target_textarea" name="target" cols="80" rows="1" onkeypress="TEXT_handle_keypress(event);" disabled></textarea> + <textarea id="target_textarea" name="target" cols="80" rows="2" onkeypress="TEXT_handle_keypress(event);" disabled></textarea> </td> </tr> </table> @@ -48,6 +48,10 @@ Note that the source word may be distorted. </div> <!-- /Source and target textboxes --> +<!-- Summary --> +<div id="view_summary" style="display:none;margin:2em"><strong>Session finished, thank you!</strong> View <a href="http://postedit.cl.uni-heidelberg.de:<?php echo $db->port; ?>/summary" target="_blank">summary</a>.</div> +<!-- /Summary --> + <!-- Buttons --> <div> <button id="help_button" class="button" onclick="$('#help').toggle('blind')">Help</button> @@ -65,7 +69,8 @@ Note that the source word may be distorted. Support: <a href="mailto://simianer@cl.uni-heidelberg.de">Mail</a> </p> <p class="tiny">Session: <?php echo $_GET["key"]; ?> | - <a href="http://postedit.cl.uni-heidelberg.de:<?php echo $db->port; ?>/debug" target="_blank">Debug</a> + <a href="http://postedit.cl.uni-heidelberg.de:<?php echo $db->port; ?>/debug" target="_blank">Debug</a> | + <a href="http://postedit.cl.uni-heidelberg.de:<?php echo $db->port; ?>/summary" target="_blank">Summary</a> </p> </div> <!-- /Help --> diff --git a/js/interface.js b/js/interface.js index 9398650..4c9685d 100644 --- a/js/interface.js +++ b/js/interface.js @@ -398,10 +398,11 @@ var request_and_process_next = function () if (data["fin"]) { target_textarea.setAttribute("disabled", "disabled"); status.style.display = "none"; - button.innerHTML = "Session finished, thank you!"; + button.innerHTML = "---"; + $("#view_summary").toggle() $("#raw_source_textarea").html(""); $("#target_textarea").val(""); - $("#target_textarea").attr("rows", 1); + $("#target_textarea").attr("rows", 2); button.setAttribute("disabled", "disabled"); pause_button.setAttribute("disabled", "disabled"); if (current_seg_id.value) @@ -477,7 +478,7 @@ var request_and_process_next = function () $("#seg_"+(id-1)).removeClass("bold"); } if (translation) - target_textarea.rows = Math.round(translation.length/80+0.5); + target_textarea.rows = Math.round(translation.length/80+0.5)+2; //raw_source_textarea.rows = Math.round(raw_source.length/80+0.5); target_textarea.focus(); $("#original_mt").val(target_textarea.value); diff --git a/phrase2_extraction/phrase2_extraction.rb b/phrase2_extraction/phrase2_extraction.rb index 547e0be..01bdae9 100755 --- a/phrase2_extraction/phrase2_extraction.rb +++ b/phrase2_extraction/phrase2_extraction.rb @@ -5,9 +5,9 @@ require 'zipf' module PhrasePhraseExtraction DEBUG = false -MAX_NT = 2 # Chiang: 2 -MAX_SEED_NUM_WORDS = 4 # Chiang: 10 words, -> phrases! -MAX_SRC_SZ = 10 # Chiang: 5 words, -> words! +MAX_NT = 1 # Chiang: 2 +MAX_SEED_NUM_WORDS = 3 # Chiang: 10 words, -> phrases! +MAX_SRC_SZ = 7 # Chiang: 5 words, -> words! FORBID_SRC_ADJACENT_SRC_NT = true # Chiang:true class Rule @@ -544,7 +544,7 @@ def PhrasePhraseExtraction.extract fstart, fend, estart, eend, f, e, a, flen, el } rules.last.rebase_alignment fs, estart fe += 1 - break if has_alignment(a, fe, "src")||fe>=elen + break if has_alignment(a, fe, "src")||fe>=flen end fs -= 1 break has_alignment(a, fs, "src")||fs<0 @@ -649,7 +649,20 @@ def PhrasePhraseExtraction.remove_adjacent_nt rules prev = false end } - b + c = false + prev = false + r.target.each { |i| + if i.is_a? String + if prev + c = true + break + end + prev = true + else + prev = false + end + } + b || c } end @@ -239,49 +239,52 @@ def process_next reply logmsg :server, "post-edit after processing: '#{e.join " "}'" f = [] data["source_raw"].each { |i| f << URI.decode(i) } - # 2.5 new rule extraction - $status = "Extracting rules from post edit" # status - #grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar" - grammar = "#{SESSION_DIR}/g/grammar.#{$db['progress']}" - current_grammar_ids = {} - ReadFile.readlines_strip(grammar).each { |r| - s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") - current_grammar_ids[s] = true - } - new_rules = PhrasePhraseExtraction.extract_rules f, e, data["align"], true - new_rules_ids = {} - $new_rules.each { |r| - s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") - new_rules_ids[s] = true - } - new_rules = new_rules.map { |r| r.as_trule_string } - _ = new_rules.dup - logmsg :server, "# rules before filtering #{new_rules.size}" - new_rules.reject! { |rs| - s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") - current_grammar_ids.has_key?(s) || new_rules_ids.has_key?(s) - } - $new_rules += new_rules - $new_rules.uniq! { |rs| - splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") - } - f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.new_rules" - f.write new_rules.join "\n" - f.close - logmsg :server, "# rules after filtering #{new_rules.size}" - add_known_rules = _-new_rules - add_known_rules.reject! { |rs| - s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") - new_rules_ids.has_key?(s) - } - f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.known_rules" - f.write add_known_rules.join "\n" - f.close - $known_rules += add_known_rules - $known_rules.uniq! { |rs| - splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") - } - add_known_rules.each { |r| logmsg :server, "known_rule: '#{r}'" } + + if !NOGRAMMAR + # 2.5 new rule extraction + $status = "Extracting rules from post edit" # status + #grammar = "#{WORK_DIR}/g/#{$db['progress']}.grammar" + grammar = "#{SESSION_DIR}/g/grammar.#{$db['progress']}" + current_grammar_ids = {} + ReadFile.readlines_strip(grammar).each { |r| + s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") + current_grammar_ids[s] = true + } + new_rules = PhrasePhraseExtraction.extract_rules f, e, data["align"], true + new_rules_ids = {} + $new_rules.each { |r| + s = splitpipe(r.to_s)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") + new_rules_ids[s] = true + } + new_rules = new_rules.map { |r| r.as_trule_string } + _ = new_rules.dup + logmsg :server, "# rules before filtering #{new_rules.size}" + new_rules.reject! { |rs| + s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") + current_grammar_ids.has_key?(s) || new_rules_ids.has_key?(s) + } + $new_rules += new_rules + $new_rules.uniq! { |rs| + splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") + } + f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.new_rules" + f.write new_rules.join "\n" + f.close + logmsg :server, "# rules after filtering #{new_rules.size}" + add_known_rules = _-new_rules + add_known_rules.reject! { |rs| + s = splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") + new_rules_ids.has_key?(s) + } + f = WriteFile.new "#{WORK_DIR}/#{$db['progress']}.known_rules" + f.write add_known_rules.join "\n" + f.close + $known_rules += add_known_rules + $known_rules.uniq! { |rs| + splitpipe(rs)[1..2].map{|i|i.strip.lstrip}.join(" ||| ") + } + add_known_rules.each { |r| logmsg :server, "known_rule: '#{r}'" } + end else # text interface post_edit = data["post_edit"] end @@ -328,19 +331,19 @@ def process_next reply send_recv :dtrain, "#{annotated_source} ||| #{post_edit}" # 5. update grammar extractor if !$pregenerated_grammars - # 5a. get forward alignment - source_lc = source.downcase - post_edit_lc = post_edit.downcase - $status = "Aligning post-edit" # status - a_fwd = send_recv :aligner_fwd, "#{source_lc} ||| #{post_edit_lc}" + # 5a. get forward alignment + source_lc = source.downcase + post_edit_lc = post_edit.downcase + $status = "Aligning post-edit" # status + a_fwd = send_recv :aligner_fwd, "#{source_lc} ||| #{post_edit_lc}" # 5b. get backward alignment - a_back = send_recv :aligner_back, "#{source_lc} ||| #{post_edit_lc}" - # 5c. symmetrize alignment - a = send_recv :atools, "#{a_fwd} ||| #{a_back}" + a_back = send_recv :aligner_back, "#{source_lc} ||| #{post_edit_lc}" + # 5c. symmetrize alignment + a = send_recv :atools, "#{a_fwd} ||| #{a_back}" # 5d actual extractor - $status = "Updating grammar extractor" # status - msg = "default_context ||| #{source} ||| #{post_edit} ||| #{a}" - send_recv :extractor, msg + $status = "Updating grammar extractor" # status + msg = "default_context ||| #{source} ||| #{post_edit} ||| #{a}" + send_recv :extractor, msg end # 6. update database $db['updated'] << true @@ -534,12 +537,18 @@ get '/debug' do # debug view pairwise_ranking_data = JSON.parse ReadFile.read(fn).force_encoding("UTF-8") end + admin = false + if params[:admin] + admin = true + end + haml :debug, :locals => { :data => data, :pairwise_ranking_data => pairwise_ranking_data, \ :progress => $db["progress"]-1, :new_rules => $new_rules, \ :known_rules => $known_rules, \ - :session_key => SESSION_KEY } + :session_key => SESSION_KEY, \ + :admin => admin } end get '/new_rules' do # new/known rules @@ -699,3 +708,27 @@ get '/shutdown' do # stop daemons and shut down server return "stopped all daemons, ready to shutdown" end +get '/summary' do + logmsg :server, "showing summary" + + data = JSON.parse ReadFile.read(DB_FILE).force_encoding("UTF-8") + + ter_scores = [] + data["post_edits"].each_with_index { |pe,j| + f = Tempfile.new "lfpe-summary-pe" + g = Tempfile.new "lfpe-summary-ref" + f.write pe+"\n" + g.write data["references"][j]+"\n" + f.close + g.close + ter_scores << (`#{CDEC}/mteval/fast_score -i #{f.path} -r #{g.path} -m ter 2>/dev/null`.to_f).round(2) + f.unlink + g.unlink + } + + haml :summary, :locals => { :session_key => SESSION_KEY, + :data => data, + :ter_scores => ter_scores } + +end + diff --git a/views/debug.haml b/views/debug.haml index 3c1e006..17f7f86 100644 --- a/views/debug.haml +++ b/views/debug.haml @@ -14,8 +14,9 @@ %p.red %strong No data to show! %ul - %li - %a{ :href => "#controls" } Controls + -if admin + %li + %a{ :href => "#controls" } Controls %li %a{ :href => "#post_edit" } Post-edit %li @@ -31,63 +32,64 @@ %p.tiny Session: #{session_key} /=######################################################################### - %h2#controls Controls - %h3 Reset - %p - %strong [Server reply] - %span#control_reply - %ul - %li - %a.ajax{:tgt => "/reset_progress", :href => "#controls"} Reset progress - %li - %a.ajax{:tgt => "/reset_weights", :href => "#controls"} Reset weights - %li - %a.ajax{:tgt => "/reset_learning_rates", :href => "#controls"} Reset learning rates - / + -if admin + %h2#controls Controls + %h3 Reset + %p + %strong [Server reply] + %span#control_reply + %ul %li - %a.ajax{:tgt => "/reset_extractor", :href => "#controls"} Reset extractor - %li - %a.ajax{:tgt => "/reset_grammars", :href => "#controls"} Reset grammars - %li - %a.ajax{:tgt => "/reset_new_rules", :href => "#controls"} Reset new rules - / + %a.ajax{:tgt => "/reset_progress", :href => "#controls"} Reset progress %li - %a.ajax{:tgt => "/shutdown", :href => "#controls"} Initiate shutdown - %h3 Learning rates & weights - %p Get current weight/rate for specific features: - %input#features_get{ :value => "Feature", :style => "text-align:center; width:20em" } - %select#features_type_get - %option{ :value => "/get_weight" } Weight - %option{ :value => "/get_rate" } Rate - %input#features_value_get{ :value => " ", :style => "text-align:right; width:10em" } - %button#get_features - Get - %p Set weight/rate for specific features: - %input#features{ :value => "Feature", :style => "text-align:center; width:20em" } - %select#features_type - %option{ :value => "/set_weights" } Weight - %option{ :value => "/set_learning_rates" } Rate - %input#features_value{ :value => "1e-05", :style => "text-align:right; width:10em" } - %button#set_features - Set - %p - Set rate for feature groups: - %span.tiny (Features unseen in training/tuning have a common default rate.) - %select#feature_groups_get - %option{ :value => "R" } rule ids - %option{ :value => "RB" } rule bigrams - %option{ :value => "Shape" } rule shapes - %input#feature_groups_value_get{ :value => "1e-05", :style => "text-align:right; width:10em" } - %button#get_feature_groups - Get rate - %br - %select#feature_groups - %option{ :value => "R" } rule ids - %option{ :value => "RB" } rule bigrams - %option{ :value => "Shape" } rule shapes - %input#feature_groups_value{ :value => "1e-05", :style => "text-align:right; width:10em" } - %button#set_feature_groups - Set rate + %a.ajax{:tgt => "/reset_weights", :href => "#controls"} Reset weights + %li + %a.ajax{:tgt => "/reset_learning_rates", :href => "#controls"} Reset learning rates + / + %li + %a.ajax{:tgt => "/reset_extractor", :href => "#controls"} Reset extractor + %li + %a.ajax{:tgt => "/reset_grammars", :href => "#controls"} Reset grammars + %li + %a.ajax{:tgt => "/reset_new_rules", :href => "#controls"} Reset new rules + / + %li + %a.ajax{:tgt => "/shutdown", :href => "#controls"} Initiate shutdown + %h3 Learning rates & weights + %p Get current weight/rate for specific features: + %input#features_get{ :value => "Feature", :style => "text-align:center; width:20em" } + %select#features_type_get + %option{ :value => "/get_weight" } Weight + %option{ :value => "/get_rate" } Rate + %input#features_value_get{ :value => " ", :style => "text-align:right; width:10em" } + %button#get_features + Get + %p Set weight/rate for specific features: + %input#features{ :value => "Feature", :style => "text-align:center; width:20em" } + %select#features_type + %option{ :value => "/set_weights" } Weight + %option{ :value => "/set_learning_rates" } Rate + %input#features_value{ :value => "1e-05", :style => "text-align:right; width:10em" } + %button#set_features + Set + %p + Set rate for feature groups: + %span.tiny (Features unseen in training/tuning have a common default rate.) + %select#feature_groups_get + %option{ :value => "R" } rule ids + %option{ :value => "RB" } rule bigrams + %option{ :value => "Shape" } rule shapes + %input#feature_groups_value_get{ :value => "1e-05", :style => "text-align:right; width:10em" } + %button#get_feature_groups + Get rate + %br + %select#feature_groups + %option{ :value => "R" } rule ids + %option{ :value => "RB" } rule bigrams + %option{ :value => "Shape" } rule shapes + %input#feature_groups_value{ :value => "1e-05", :style => "text-align:right; width:10em" } + %button#set_feature_groups + Set rate /=######################################################################### @@ -109,14 +111,14 @@ %td.left.noborder ##{[0,progress].max} %tr %td.noborder - %strong MT Input: - %td.left.noborder #{data["source_segments"][[0,progress].max]} - %tr - %td.noborder %strong Raw source: %td.left.noborder #{data["raw_source_segments"][[0,progress].max]} %tr %td.noborder + %strong MT Input: + %td.left.noborder #{data["source_segments"][[0,progress].max]} + %tr + %td.noborder %strong Post-edit: %td.left.noborder #{data["post_edits_raw"][progress]} %tr diff --git a/views/summary.haml b/views/summary.haml new file mode 100644 index 0000000..29dc0ad --- /dev/null +++ b/views/summary.haml @@ -0,0 +1,53 @@ +- require 'zipf' +!!! +%html + %head + %title Summary (Session #{session_key}) + %link(rel="stylesheet" type="text/css" href="debug.css") + %script{:src => "http://postedit.cl.uni-heidelberg.de/js/jquery.min.js"} + %script{:src => "http://postedit.cl.uni-heidelberg.de/js/jquery.tablesorter.min.js"} + %script{:src => "http://postedit.cl.uni-heidelberg.de/js/debug.js"} + %body + %h1 Summary + %p (Session #{session_key}) + %p Data is shown in the MT system's formatting. BLEU is calculated without smoothing. + %table + %tr + %td + #{"#"} + %td + %strong Source + %td + %strong + Post-Edit + %td + %strong + Reference + %td + %strong + BLEU + %td + %strong + TER + %td + %strong + Keystrokes + %td + %strong + Mouse actions + %td + %strong + Duration + - data["post_edits"].each_with_index do |pe,j| + %tr + %td.center #{j+1}. + %td #{data["source_segments"][j]} + %td #{pe} + %td #{data["references"][j]} + %td.center #{(BLEU::per_sentence_bleu(pe, [data["references"][j]], 4, 0)*100).round 2}% + %td.center #{ter_scores[j]} + %td.center #{data["count_kbd"][j]} + %td.center #{data["count_click"][j]} + %td.center #{(data["durations"][j]/1000).round 1}s + + |