summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2015-04-02 13:58:20 +0200
committerPatrick Simianer <p@simianer.de>2015-04-02 13:58:20 +0200
commit0f10af77140704a969073e0718c2eb3ba1f99ead (patch)
tree0c1093b81171ae5e1d8dbcc02dcfbbe44ac72ba3
parentc2ce37c7c49bc17420a1bbe8a0ae50aa10e78207 (diff)
learning
-rwxr-xr-x.htaccess4
-rw-r--r--img/ajax-loader-large.gifbin0 -> 19110 bytes
-rw-r--r--index.php170
-rw-r--r--lfpe.css19
-rw-r--r--lfpe.js55
-rw-r--r--model/cdec.ini28
-rw-r--r--model/dtrain.ini4
-rw-r--r--model/grammar.gzbin0 -> 121 bytes
-rw-r--r--model/nc-wmt11.en.srilm.gzbin0 -> 16017291 bytes
-rwxr-xr-xmodel/run4
-rw-r--r--model/src.gzbin0 -> 70 bytes
-rw-r--r--model/weights.init12
-rw-r--r--server.rb57
13 files changed, 201 insertions, 152 deletions
diff --git a/.htaccess b/.htaccess
new file mode 100755
index 0000000..76a4aff
--- /dev/null
+++ b/.htaccess
@@ -0,0 +1,4 @@
+AuthName "Postedit Login"
+AuthType Basic
+AuthUserFile /workspace/post_editing/conf/.htpasswd
+require valid-user
diff --git a/img/ajax-loader-large.gif b/img/ajax-loader-large.gif
new file mode 100644
index 0000000..1fe22da
--- /dev/null
+++ b/img/ajax-loader-large.gif
Binary files differ
diff --git a/index.php b/index.php
index 3354eea..ef1574e 100644
--- a/index.php
+++ b/index.php
@@ -1,163 +1,29 @@
<html>
<head>
-<meta charset="utf-8" />
-<title>Post-editing application</title>
+ <meta charset="utf-8" />
+ <title>Post-editing application</title>
+ <script src="lfpe.js"></script>
+ <link rel="stylesheet" type="text/css" href="lfpe.css" />
</head>
-<body onload="alertmessage()">
-<script type="text/javascript">
-var count= 1;
-var count1;
+<body onload="Next()">
-// welcome message
-function alertmessage()
-{
-alert("Welcome to Post-editing app! \n Good luck! =) ");
-}
+<div>
+ <textarea id="src" name="source" cols="1" rows="1" readonly></textarea>
+</div>
-document.write("Please correct the machine translation from German to English.")
-document.writeln("<br >");
-document.writeln("<br >");
+<div>
+ <textarea id="trgt" name="target" cols="1" rows="1"></textarea>
+</div>
+<p>
+ <button id="next" type="button" onclick="Next()">Next</button>
+</p>
-function GetSourceData()
-{
-var xmlhttp;
-if (window.XMLHttpRequest)
- {// code for IE7+, Firefox, Chrome, Opera, Safari
- xmlhttp=new XMLHttpRequest();
- }
-else
- {// code for IE6, IE5
- xmlhttp=new ActiveXObject("Microsoft.XMLHTTP");
- }
-xmlhttp.onreadystatechange=function()
- {
- if (xmlhttp.readyState==4 && xmlhttp.status==200)
- {
- document.getElementById("src").innerHTML=xmlhttp.responseText;
- }
- }
-xmlhttp.open("POST","server_js1.php",true);
-xmlhttp.setRequestHeader("Content-type","application/x-www-form-urlencoded");
-xmlhttp.send("number="+count);
-}
-
-function GetTargetData()
-{
-var xmlhttp;
-if (window.XMLHttpRequest)
- {// code for IE7+, Firefox, Chrome, Opera, Safari
- xmlhttp=new XMLHttpRequest();
- }
-else
- {// code for IE6, IE5
- xmlhttp=new ActiveXObject("Microsoft.XMLHTTP");
- }
-xmlhttp.onreadystatechange=function()
- {
- if (xmlhttp.readyState==4 && xmlhttp.status==200)
- {
- document.getElementById('trgt').value= xmlhttp.responseText;
- }
- }
-xmlhttp.open("POST","server_js1.php",true);
-xmlhttp.setRequestHeader("Content-type","application/x-www-form-urlencoded");
-xmlhttp.send("number_trgt="+count);
-}
-
-
-function SubmitData()
-{
-var xmlhttp;
-if (window.XMLHttpRequest)
- {// code for IE7+, Firefox, Chrome, Opera, Safari
- xmlhttp=new XMLHttpRequest();
- }
-else
- {// code for IE6, IE5
- xmlhttp=new ActiveXObject("Microsoft.XMLHTTP");
- }
-xmlhttp.onreadystatechange=function()
- {
- if (xmlhttp.readyState==4 && xmlhttp.status==200)
- {
- alert("Thank you for your submission: " +xmlhttp.responseText);
- }
- }
-xmlhttp.open("POST","server_js1.php",true);
-xmlhttp.setRequestHeader("Content-type","application/x-www-form-urlencoded");
-xmlhttp.send("postedit="+document.getElementById('trgt').value);
-
-}
-
-
-function SubmitAndGetData()
-{
- SubmitData();
- GetSourceData();
- GetTargetData();
- count++;
-}
-
-function GetTargetDataAgain()
-{
-var xmlhttp;
-if (window.XMLHttpRequest)
- {// code for IE7+, Firefox, Chrome, Opera, Safari
- xmlhttp=new XMLHttpRequest();
- }
-else
- {// code for IE6, IE5
- xmlhttp=new ActiveXObject("Microsoft.XMLHTTP");
- }
-xmlhttp.onreadystatechange=function()
- {
- if (xmlhttp.readyState==4 && xmlhttp.status==200)
- {
- document.getElementById('trgt').value= xmlhttp.responseText;
- }
- }
-xmlhttp.open("POST","server_js1.php",true);
-xmlhttp.setRequestHeader("Content-type","application/x-www-form-urlencoded");
-if (count==1)
-{
- count1 = -5;
-}
-else
-{
- count1 = count -1;
-}
-xmlhttp.send("number_trgt="+count1);
-}
-
-</script>
-
-
-
-
-<h3>There is a source sentence.</h3>
-<form action="textarea.htm">
-<textarea id="src" style="font-size: 20px" name="source" cols="130" rows="2" readonly><?php $mySourceFile = fopen("source.txt", "r") or die("Unable to open file!");
-echo fgets($mySourceFile);
-fclose($mySourceFile);
-?></textarea>
-</form><br><br/>
-
-<h3>Please post-edit the SMT output.</h3>
-<form action="textarea.htm">
-<textarea id="trgt" style="font-size: 20px" name="target" cols="130" rows="2"><?php
-$myTargetFile = fopen("target.txt", "r") or die("Unable to open file!");
-$SMToutput = fgets($myTargetFile);
-echo $SMToutput;
-fclose($myTargetFile);
-?></textarea><br><br/>
-</form>
-
-
-<button type="button" onclick="GetTargetDataAgain()">Revert to SMT output</button>
-
-<button type="button" onclick="SubmitAndGetData()">Submit postedit</button>
+<p id="translating_status">
+ <strong>translating</strong> <img src="img/ajax-loader-large.gif" width="20px" />
+</p>
</body>
</html>
+
diff --git a/lfpe.css b/lfpe.css
new file mode 100644
index 0000000..b7959fa
--- /dev/null
+++ b/lfpe.css
@@ -0,0 +1,19 @@
+textarea {
+ font-size: 20px
+}
+
+p#translating_status {
+ display: none;
+}
+
+button {
+ margin: 1em;
+ padding: .25em;
+ background: #fff;
+ font-weight: bold
+}
+
+p#fi {
+ display: none;
+}
+
diff --git a/lfpe.js b/lfpe.js
new file mode 100644
index 0000000..4bbbcee
--- /dev/null
+++ b/lfpe.js
@@ -0,0 +1,55 @@
+function CreateCORSRequest(method, url) {
+ var xhr = new XMLHttpRequest();
+ if ("withCredentials" in xhr) {
+ // XHR for Chrome/Firefox/Opera/Safari.
+ xhr.open(method, url, true);
+ } else if (typeof XDomainRequest != "undefined") {
+ // XDomainRequest for IE.
+ xhr = new XDomainRequest();
+ xhr.open(method, url);
+ } else {
+ // CORS not supported.
+ xhr = null;
+ }
+ return xhr;
+}
+
+function Next(url)
+{
+ url = "http://localhost:31337/next";
+ var pe = document.getElementById("trgt").value;
+ if (pe != "") {
+ var src = document.getElementById("src").value;
+ url += "?example="+src+" %7C%7C%7C "+pe;
+ }
+ document.getElementById("translating_status").style.display = "block";
+ var xhr = CreateCORSRequest('get', url);
+ if (!xhr) {
+ alert('CORS not supported');
+ return;
+ }
+
+ xhr.onload = function() {
+ var x = xhr.responseText.split("\t");
+ if (x == "fi") {
+ document.getElementById("src").style.display = "none";
+ document.getElementById("trgt").style.display = "none";
+ document.getElementById("translating_status").style.display = "none";
+ document.getElementById("next").innerHTML = "Thank you!";
+ document.getElementById("next").disabled = true;
+ } else {
+ document.getElementById("src").value = x[0];
+ document.getElementById("src").cols = x[0].length;
+ document.getElementById("trgt").value = x[1];
+ document.getElementById("trgt").cols = x[1].length;
+ document.getElementById("translating_status").style.display = "none";
+ }
+ };
+
+ xhr.onerror = function() {
+ alert('Error');
+ };
+
+ xhr.send();
+}
+
diff --git a/model/cdec.ini b/model/cdec.ini
new file mode 100644
index 0000000..36f53ac
--- /dev/null
+++ b/model/cdec.ini
@@ -0,0 +1,28 @@
+formalism=scfg
+add_pass_through_rules=true
+scfg_max_span_limit=15
+intersection_strategy=cube_pruning
+cubepruning_pop_limit=200
+grammar=grammar.gz
+feature_function=WordPenalty
+feature_function=KLanguageModel nc-wmt11.en.srilm.gz
+# all currently working feature functions for translation:
+# (with those features active that were used in the ACL paper)
+#feature_function=ArityPenalty
+#feature_function=CMR2008ReorderingFeatures
+#feature_function=Dwarf
+#feature_function=InputIndicator
+#feature_function=LexNullJump
+#feature_function=NewJump
+#feature_function=NgramFeatures
+#feature_function=NonLatinCount
+#feature_function=OutputIndicator
+feature_function=RuleIdentityFeatures
+feature_function=RuleSourceBigramFeatures
+feature_function=RuleTargetBigramFeatures
+feature_function=RuleShape
+#feature_function=LexicalFeatures 1 1 1
+#feature_function=SourceSpanSizeFeatures
+#feature_function=SourceWordPenalty
+#feature_function=SpanFeatures
+weights=weights.init
diff --git a/model/dtrain.ini b/model/dtrain.ini
new file mode 100644
index 0000000..b6d29bb
--- /dev/null
+++ b/model/dtrain.ini
@@ -0,0 +1,4 @@
+decoder_conf=./cdec.ini # config for cdec
+k=100 # use 100best lists
+N=4 # optimize (approx.) BLEU4
+margin=0.0 # perceptron's margin
diff --git a/model/grammar.gz b/model/grammar.gz
new file mode 100644
index 0000000..84eb73c
--- /dev/null
+++ b/model/grammar.gz
Binary files differ
diff --git a/model/nc-wmt11.en.srilm.gz b/model/nc-wmt11.en.srilm.gz
new file mode 100644
index 0000000..7ce8105
--- /dev/null
+++ b/model/nc-wmt11.en.srilm.gz
Binary files differ
diff --git a/model/run b/model/run
new file mode 100755
index 0000000..43f20b0
--- /dev/null
+++ b/model/run
@@ -0,0 +1,4 @@
+#!/bin/bash -x
+
+~/src/cdec_net/training/dtrain/dtrain_net_interface -c dtrain.ini -a ipc:///tmp/dtrain.ipc
+
diff --git a/model/src.gz b/model/src.gz
new file mode 100644
index 0000000..9aa247f
--- /dev/null
+++ b/model/src.gz
Binary files differ
diff --git a/model/weights.init b/model/weights.init
new file mode 100644
index 0000000..0d09f9f
--- /dev/null
+++ b/model/weights.init
@@ -0,0 +1,12 @@
+CountEF 0.1
+EgivenFCoherent -0.1
+Glue 0.01
+IsSingletonF -0.01
+IsSingletonFE -0.01
+LanguageModel 0.1
+LanguageModel_OOV -1
+MaxLexFgivenE -0.1
+MaxLexEgivenF -0.1
+PassThrough -0.1
+SampleCountF -0.1
+WordPenalty -0.1
diff --git a/server.rb b/server.rb
new file mode 100644
index 0000000..fac51d1
--- /dev/null
+++ b/server.rb
@@ -0,0 +1,57 @@
+#!/usr/bin/env ruby
+
+require 'sinatra'
+require 'sinatra/cross_origin'
+require 'nanomsg'
+require 'zipf'
+
+set :bind, '0.0.0.0'
+set :port, 31337
+
+set :allow_origin, :any
+set :allow_methods, [:get, :post, :options]
+set :allow_credentials, true
+set :max_age, "1728000"
+set :expose_headers, ['Content-Type']
+
+sock = NanoMsg::PairSocket.new
+addr = "ipc:///tmp/dtrain.ipc"
+sock.bind addr
+
+input = ReadFile.readlines_strip "model/src.gz"
+input_ = Array.new input
+
+get '/' do
+ cross_origin
+ "Nothing to see here."
+end
+
+get '/next' do
+ cross_origin
+ if params[:example]
+ sock.send params[:example].strip
+ puts params.to_s
+ sock.recv # dummy
+ end
+ src = input.shift
+ if !src
+ puts "end of input, sending 'fi'"
+ "fi"
+ else
+ puts "sending source '#{src}' ..."
+ sock.send "act:translate ||| #{src}"
+ puts "done"
+ sleep 1
+ puts "waiting for translation ..."
+ t = sock.recv
+ puts "got translation '#{t}'"
+ "#{src}\t#{t}"
+ end
+end
+
+get '/reset' do
+ cross_origin
+ input = Array.new input_
+ "done"
+end
+