diff options
-rwxr-xr-x | .htaccess | 4 | ||||
-rw-r--r-- | img/ajax-loader-large.gif | bin | 0 -> 19110 bytes | |||
-rw-r--r-- | index.php | 170 | ||||
-rw-r--r-- | lfpe.css | 19 | ||||
-rw-r--r-- | lfpe.js | 55 | ||||
-rw-r--r-- | model/cdec.ini | 28 | ||||
-rw-r--r-- | model/dtrain.ini | 4 | ||||
-rw-r--r-- | model/grammar.gz | bin | 0 -> 121 bytes | |||
-rw-r--r-- | model/nc-wmt11.en.srilm.gz | bin | 0 -> 16017291 bytes | |||
-rwxr-xr-x | model/run | 4 | ||||
-rw-r--r-- | model/src.gz | bin | 0 -> 70 bytes | |||
-rw-r--r-- | model/weights.init | 12 | ||||
-rw-r--r-- | server.rb | 57 |
13 files changed, 201 insertions, 152 deletions
diff --git a/.htaccess b/.htaccess new file mode 100755 index 0000000..76a4aff --- /dev/null +++ b/.htaccess @@ -0,0 +1,4 @@ +AuthName "Postedit Login" +AuthType Basic +AuthUserFile /workspace/post_editing/conf/.htpasswd +require valid-user diff --git a/img/ajax-loader-large.gif b/img/ajax-loader-large.gif Binary files differnew file mode 100644 index 0000000..1fe22da --- /dev/null +++ b/img/ajax-loader-large.gif @@ -1,163 +1,29 @@ <html> <head> -<meta charset="utf-8" /> -<title>Post-editing application</title> + <meta charset="utf-8" /> + <title>Post-editing application</title> + <script src="lfpe.js"></script> + <link rel="stylesheet" type="text/css" href="lfpe.css" /> </head> -<body onload="alertmessage()"> -<script type="text/javascript"> -var count= 1; -var count1; +<body onload="Next()"> -// welcome message -function alertmessage() -{ -alert("Welcome to Post-editing app! \n Good luck! =) "); -} +<div> + <textarea id="src" name="source" cols="1" rows="1" readonly></textarea> +</div> -document.write("Please correct the machine translation from German to English.") -document.writeln("<br >"); -document.writeln("<br >"); +<div> + <textarea id="trgt" name="target" cols="1" rows="1"></textarea> +</div> +<p> + <button id="next" type="button" onclick="Next()">Next</button> +</p> -function GetSourceData() -{ -var xmlhttp; -if (window.XMLHttpRequest) - {// code for IE7+, Firefox, Chrome, Opera, Safari - xmlhttp=new XMLHttpRequest(); - } -else - {// code for IE6, IE5 - xmlhttp=new ActiveXObject("Microsoft.XMLHTTP"); - } -xmlhttp.onreadystatechange=function() - { - if (xmlhttp.readyState==4 && xmlhttp.status==200) - { - document.getElementById("src").innerHTML=xmlhttp.responseText; - } - } -xmlhttp.open("POST","server_js1.php",true); -xmlhttp.setRequestHeader("Content-type","application/x-www-form-urlencoded"); -xmlhttp.send("number="+count); -} - -function GetTargetData() -{ -var xmlhttp; -if (window.XMLHttpRequest) - {// code for IE7+, Firefox, Chrome, Opera, Safari - xmlhttp=new XMLHttpRequest(); - } -else - {// code for IE6, IE5 - xmlhttp=new ActiveXObject("Microsoft.XMLHTTP"); - } -xmlhttp.onreadystatechange=function() - { - if (xmlhttp.readyState==4 && xmlhttp.status==200) - { - document.getElementById('trgt').value= xmlhttp.responseText; - } - } -xmlhttp.open("POST","server_js1.php",true); -xmlhttp.setRequestHeader("Content-type","application/x-www-form-urlencoded"); -xmlhttp.send("number_trgt="+count); -} - - -function SubmitData() -{ -var xmlhttp; -if (window.XMLHttpRequest) - {// code for IE7+, Firefox, Chrome, Opera, Safari - xmlhttp=new XMLHttpRequest(); - } -else - {// code for IE6, IE5 - xmlhttp=new ActiveXObject("Microsoft.XMLHTTP"); - } -xmlhttp.onreadystatechange=function() - { - if (xmlhttp.readyState==4 && xmlhttp.status==200) - { - alert("Thank you for your submission: " +xmlhttp.responseText); - } - } -xmlhttp.open("POST","server_js1.php",true); -xmlhttp.setRequestHeader("Content-type","application/x-www-form-urlencoded"); -xmlhttp.send("postedit="+document.getElementById('trgt').value); - -} - - -function SubmitAndGetData() -{ - SubmitData(); - GetSourceData(); - GetTargetData(); - count++; -} - -function GetTargetDataAgain() -{ -var xmlhttp; -if (window.XMLHttpRequest) - {// code for IE7+, Firefox, Chrome, Opera, Safari - xmlhttp=new XMLHttpRequest(); - } -else - {// code for IE6, IE5 - xmlhttp=new ActiveXObject("Microsoft.XMLHTTP"); - } -xmlhttp.onreadystatechange=function() - { - if (xmlhttp.readyState==4 && xmlhttp.status==200) - { - document.getElementById('trgt').value= xmlhttp.responseText; - } - } -xmlhttp.open("POST","server_js1.php",true); -xmlhttp.setRequestHeader("Content-type","application/x-www-form-urlencoded"); -if (count==1) -{ - count1 = -5; -} -else -{ - count1 = count -1; -} -xmlhttp.send("number_trgt="+count1); -} - -</script> - - - - -<h3>There is a source sentence.</h3> -<form action="textarea.htm"> -<textarea id="src" style="font-size: 20px" name="source" cols="130" rows="2" readonly><?php $mySourceFile = fopen("source.txt", "r") or die("Unable to open file!"); -echo fgets($mySourceFile); -fclose($mySourceFile); -?></textarea> -</form><br><br/> - -<h3>Please post-edit the SMT output.</h3> -<form action="textarea.htm"> -<textarea id="trgt" style="font-size: 20px" name="target" cols="130" rows="2"><?php -$myTargetFile = fopen("target.txt", "r") or die("Unable to open file!"); -$SMToutput = fgets($myTargetFile); -echo $SMToutput; -fclose($myTargetFile); -?></textarea><br><br/> -</form> - - -<button type="button" onclick="GetTargetDataAgain()">Revert to SMT output</button> - -<button type="button" onclick="SubmitAndGetData()">Submit postedit</button> +<p id="translating_status"> + <strong>translating</strong> <img src="img/ajax-loader-large.gif" width="20px" /> +</p> </body> </html> + diff --git a/lfpe.css b/lfpe.css new file mode 100644 index 0000000..b7959fa --- /dev/null +++ b/lfpe.css @@ -0,0 +1,19 @@ +textarea { + font-size: 20px +} + +p#translating_status { + display: none; +} + +button { + margin: 1em; + padding: .25em; + background: #fff; + font-weight: bold +} + +p#fi { + display: none; +} + @@ -0,0 +1,55 @@ +function CreateCORSRequest(method, url) { + var xhr = new XMLHttpRequest(); + if ("withCredentials" in xhr) { + // XHR for Chrome/Firefox/Opera/Safari. + xhr.open(method, url, true); + } else if (typeof XDomainRequest != "undefined") { + // XDomainRequest for IE. + xhr = new XDomainRequest(); + xhr.open(method, url); + } else { + // CORS not supported. + xhr = null; + } + return xhr; +} + +function Next(url) +{ + url = "http://localhost:31337/next"; + var pe = document.getElementById("trgt").value; + if (pe != "") { + var src = document.getElementById("src").value; + url += "?example="+src+" %7C%7C%7C "+pe; + } + document.getElementById("translating_status").style.display = "block"; + var xhr = CreateCORSRequest('get', url); + if (!xhr) { + alert('CORS not supported'); + return; + } + + xhr.onload = function() { + var x = xhr.responseText.split("\t"); + if (x == "fi") { + document.getElementById("src").style.display = "none"; + document.getElementById("trgt").style.display = "none"; + document.getElementById("translating_status").style.display = "none"; + document.getElementById("next").innerHTML = "Thank you!"; + document.getElementById("next").disabled = true; + } else { + document.getElementById("src").value = x[0]; + document.getElementById("src").cols = x[0].length; + document.getElementById("trgt").value = x[1]; + document.getElementById("trgt").cols = x[1].length; + document.getElementById("translating_status").style.display = "none"; + } + }; + + xhr.onerror = function() { + alert('Error'); + }; + + xhr.send(); +} + diff --git a/model/cdec.ini b/model/cdec.ini new file mode 100644 index 0000000..36f53ac --- /dev/null +++ b/model/cdec.ini @@ -0,0 +1,28 @@ +formalism=scfg +add_pass_through_rules=true +scfg_max_span_limit=15 +intersection_strategy=cube_pruning +cubepruning_pop_limit=200 +grammar=grammar.gz +feature_function=WordPenalty +feature_function=KLanguageModel nc-wmt11.en.srilm.gz +# all currently working feature functions for translation: +# (with those features active that were used in the ACL paper) +#feature_function=ArityPenalty +#feature_function=CMR2008ReorderingFeatures +#feature_function=Dwarf +#feature_function=InputIndicator +#feature_function=LexNullJump +#feature_function=NewJump +#feature_function=NgramFeatures +#feature_function=NonLatinCount +#feature_function=OutputIndicator +feature_function=RuleIdentityFeatures +feature_function=RuleSourceBigramFeatures +feature_function=RuleTargetBigramFeatures +feature_function=RuleShape +#feature_function=LexicalFeatures 1 1 1 +#feature_function=SourceSpanSizeFeatures +#feature_function=SourceWordPenalty +#feature_function=SpanFeatures +weights=weights.init diff --git a/model/dtrain.ini b/model/dtrain.ini new file mode 100644 index 0000000..b6d29bb --- /dev/null +++ b/model/dtrain.ini @@ -0,0 +1,4 @@ +decoder_conf=./cdec.ini # config for cdec +k=100 # use 100best lists +N=4 # optimize (approx.) BLEU4 +margin=0.0 # perceptron's margin diff --git a/model/grammar.gz b/model/grammar.gz Binary files differnew file mode 100644 index 0000000..84eb73c --- /dev/null +++ b/model/grammar.gz diff --git a/model/nc-wmt11.en.srilm.gz b/model/nc-wmt11.en.srilm.gz Binary files differnew file mode 100644 index 0000000..7ce8105 --- /dev/null +++ b/model/nc-wmt11.en.srilm.gz diff --git a/model/run b/model/run new file mode 100755 index 0000000..43f20b0 --- /dev/null +++ b/model/run @@ -0,0 +1,4 @@ +#!/bin/bash -x + +~/src/cdec_net/training/dtrain/dtrain_net_interface -c dtrain.ini -a ipc:///tmp/dtrain.ipc + diff --git a/model/src.gz b/model/src.gz Binary files differnew file mode 100644 index 0000000..9aa247f --- /dev/null +++ b/model/src.gz diff --git a/model/weights.init b/model/weights.init new file mode 100644 index 0000000..0d09f9f --- /dev/null +++ b/model/weights.init @@ -0,0 +1,12 @@ +CountEF 0.1 +EgivenFCoherent -0.1 +Glue 0.01 +IsSingletonF -0.01 +IsSingletonFE -0.01 +LanguageModel 0.1 +LanguageModel_OOV -1 +MaxLexFgivenE -0.1 +MaxLexEgivenF -0.1 +PassThrough -0.1 +SampleCountF -0.1 +WordPenalty -0.1 diff --git a/server.rb b/server.rb new file mode 100644 index 0000000..fac51d1 --- /dev/null +++ b/server.rb @@ -0,0 +1,57 @@ +#!/usr/bin/env ruby + +require 'sinatra' +require 'sinatra/cross_origin' +require 'nanomsg' +require 'zipf' + +set :bind, '0.0.0.0' +set :port, 31337 + +set :allow_origin, :any +set :allow_methods, [:get, :post, :options] +set :allow_credentials, true +set :max_age, "1728000" +set :expose_headers, ['Content-Type'] + +sock = NanoMsg::PairSocket.new +addr = "ipc:///tmp/dtrain.ipc" +sock.bind addr + +input = ReadFile.readlines_strip "model/src.gz" +input_ = Array.new input + +get '/' do + cross_origin + "Nothing to see here." +end + +get '/next' do + cross_origin + if params[:example] + sock.send params[:example].strip + puts params.to_s + sock.recv # dummy + end + src = input.shift + if !src + puts "end of input, sending 'fi'" + "fi" + else + puts "sending source '#{src}' ..." + sock.send "act:translate ||| #{src}" + puts "done" + sleep 1 + puts "waiting for translation ..." + t = sock.recv + puts "got translation '#{t}'" + "#{src}\t#{t}" + end +end + +get '/reset' do + cross_origin + input = Array.new input_ + "done" +end + |