From 0f10af77140704a969073e0718c2eb3ba1f99ead Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Thu, 2 Apr 2015 13:58:20 +0200
Subject: learning
---
.htaccess | 4 ++
img/ajax-loader-large.gif | Bin 0 -> 19110 bytes
index.php | 170 +++++----------------------------------------
lfpe.css | 19 +++++
lfpe.js | 55 +++++++++++++++
model/cdec.ini | 28 ++++++++
model/dtrain.ini | 4 ++
model/grammar.gz | Bin 0 -> 121 bytes
model/nc-wmt11.en.srilm.gz | Bin 0 -> 16017291 bytes
model/run | 4 ++
model/src.gz | Bin 0 -> 70 bytes
model/weights.init | 12 ++++
server.rb | 57 +++++++++++++++
13 files changed, 201 insertions(+), 152 deletions(-)
create mode 100755 .htaccess
create mode 100644 img/ajax-loader-large.gif
create mode 100644 lfpe.css
create mode 100644 lfpe.js
create mode 100644 model/cdec.ini
create mode 100644 model/dtrain.ini
create mode 100644 model/grammar.gz
create mode 100644 model/nc-wmt11.en.srilm.gz
create mode 100755 model/run
create mode 100644 model/src.gz
create mode 100644 model/weights.init
create mode 100644 server.rb
diff --git a/.htaccess b/.htaccess
new file mode 100755
index 0000000..76a4aff
--- /dev/null
+++ b/.htaccess
@@ -0,0 +1,4 @@
+AuthName "Postedit Login"
+AuthType Basic
+AuthUserFile /workspace/post_editing/conf/.htpasswd
+require valid-user
diff --git a/img/ajax-loader-large.gif b/img/ajax-loader-large.gif
new file mode 100644
index 0000000..1fe22da
Binary files /dev/null and b/img/ajax-loader-large.gif differ
diff --git a/index.php b/index.php
index 3354eea..ef1574e 100644
--- a/index.php
+++ b/index.php
@@ -1,163 +1,29 @@
-
-Post-editing application
+
+ Post-editing application
+
+
-
-
-
-
-
-
-There is a source sentence.
-
-
-Please post-edit the SMT output.
-
-
-
-
-
-
+
+ translating
+
+
diff --git a/lfpe.css b/lfpe.css
new file mode 100644
index 0000000..b7959fa
--- /dev/null
+++ b/lfpe.css
@@ -0,0 +1,19 @@
+textarea {
+ font-size: 20px
+}
+
+p#translating_status {
+ display: none;
+}
+
+button {
+ margin: 1em;
+ padding: .25em;
+ background: #fff;
+ font-weight: bold
+}
+
+p#fi {
+ display: none;
+}
+
diff --git a/lfpe.js b/lfpe.js
new file mode 100644
index 0000000..4bbbcee
--- /dev/null
+++ b/lfpe.js
@@ -0,0 +1,55 @@
+function CreateCORSRequest(method, url) {
+ var xhr = new XMLHttpRequest();
+ if ("withCredentials" in xhr) {
+ // XHR for Chrome/Firefox/Opera/Safari.
+ xhr.open(method, url, true);
+ } else if (typeof XDomainRequest != "undefined") {
+ // XDomainRequest for IE.
+ xhr = new XDomainRequest();
+ xhr.open(method, url);
+ } else {
+ // CORS not supported.
+ xhr = null;
+ }
+ return xhr;
+}
+
+function Next(url)
+{
+ url = "http://localhost:31337/next";
+ var pe = document.getElementById("trgt").value;
+ if (pe != "") {
+ var src = document.getElementById("src").value;
+ url += "?example="+src+" %7C%7C%7C "+pe;
+ }
+ document.getElementById("translating_status").style.display = "block";
+ var xhr = CreateCORSRequest('get', url);
+ if (!xhr) {
+ alert('CORS not supported');
+ return;
+ }
+
+ xhr.onload = function() {
+ var x = xhr.responseText.split("\t");
+ if (x == "fi") {
+ document.getElementById("src").style.display = "none";
+ document.getElementById("trgt").style.display = "none";
+ document.getElementById("translating_status").style.display = "none";
+ document.getElementById("next").innerHTML = "Thank you!";
+ document.getElementById("next").disabled = true;
+ } else {
+ document.getElementById("src").value = x[0];
+ document.getElementById("src").cols = x[0].length;
+ document.getElementById("trgt").value = x[1];
+ document.getElementById("trgt").cols = x[1].length;
+ document.getElementById("translating_status").style.display = "none";
+ }
+ };
+
+ xhr.onerror = function() {
+ alert('Error');
+ };
+
+ xhr.send();
+}
+
diff --git a/model/cdec.ini b/model/cdec.ini
new file mode 100644
index 0000000..36f53ac
--- /dev/null
+++ b/model/cdec.ini
@@ -0,0 +1,28 @@
+formalism=scfg
+add_pass_through_rules=true
+scfg_max_span_limit=15
+intersection_strategy=cube_pruning
+cubepruning_pop_limit=200
+grammar=grammar.gz
+feature_function=WordPenalty
+feature_function=KLanguageModel nc-wmt11.en.srilm.gz
+# all currently working feature functions for translation:
+# (with those features active that were used in the ACL paper)
+#feature_function=ArityPenalty
+#feature_function=CMR2008ReorderingFeatures
+#feature_function=Dwarf
+#feature_function=InputIndicator
+#feature_function=LexNullJump
+#feature_function=NewJump
+#feature_function=NgramFeatures
+#feature_function=NonLatinCount
+#feature_function=OutputIndicator
+feature_function=RuleIdentityFeatures
+feature_function=RuleSourceBigramFeatures
+feature_function=RuleTargetBigramFeatures
+feature_function=RuleShape
+#feature_function=LexicalFeatures 1 1 1
+#feature_function=SourceSpanSizeFeatures
+#feature_function=SourceWordPenalty
+#feature_function=SpanFeatures
+weights=weights.init
diff --git a/model/dtrain.ini b/model/dtrain.ini
new file mode 100644
index 0000000..b6d29bb
--- /dev/null
+++ b/model/dtrain.ini
@@ -0,0 +1,4 @@
+decoder_conf=./cdec.ini # config for cdec
+k=100 # use 100best lists
+N=4 # optimize (approx.) BLEU4
+margin=0.0 # perceptron's margin
diff --git a/model/grammar.gz b/model/grammar.gz
new file mode 100644
index 0000000..84eb73c
Binary files /dev/null and b/model/grammar.gz differ
diff --git a/model/nc-wmt11.en.srilm.gz b/model/nc-wmt11.en.srilm.gz
new file mode 100644
index 0000000..7ce8105
Binary files /dev/null and b/model/nc-wmt11.en.srilm.gz differ
diff --git a/model/run b/model/run
new file mode 100755
index 0000000..43f20b0
--- /dev/null
+++ b/model/run
@@ -0,0 +1,4 @@
+#!/bin/bash -x
+
+~/src/cdec_net/training/dtrain/dtrain_net_interface -c dtrain.ini -a ipc:///tmp/dtrain.ipc
+
diff --git a/model/src.gz b/model/src.gz
new file mode 100644
index 0000000..9aa247f
Binary files /dev/null and b/model/src.gz differ
diff --git a/model/weights.init b/model/weights.init
new file mode 100644
index 0000000..0d09f9f
--- /dev/null
+++ b/model/weights.init
@@ -0,0 +1,12 @@
+CountEF 0.1
+EgivenFCoherent -0.1
+Glue 0.01
+IsSingletonF -0.01
+IsSingletonFE -0.01
+LanguageModel 0.1
+LanguageModel_OOV -1
+MaxLexFgivenE -0.1
+MaxLexEgivenF -0.1
+PassThrough -0.1
+SampleCountF -0.1
+WordPenalty -0.1
diff --git a/server.rb b/server.rb
new file mode 100644
index 0000000..fac51d1
--- /dev/null
+++ b/server.rb
@@ -0,0 +1,57 @@
+#!/usr/bin/env ruby
+
+require 'sinatra'
+require 'sinatra/cross_origin'
+require 'nanomsg'
+require 'zipf'
+
+set :bind, '0.0.0.0'
+set :port, 31337
+
+set :allow_origin, :any
+set :allow_methods, [:get, :post, :options]
+set :allow_credentials, true
+set :max_age, "1728000"
+set :expose_headers, ['Content-Type']
+
+sock = NanoMsg::PairSocket.new
+addr = "ipc:///tmp/dtrain.ipc"
+sock.bind addr
+
+input = ReadFile.readlines_strip "model/src.gz"
+input_ = Array.new input
+
+get '/' do
+ cross_origin
+ "Nothing to see here."
+end
+
+get '/next' do
+ cross_origin
+ if params[:example]
+ sock.send params[:example].strip
+ puts params.to_s
+ sock.recv # dummy
+ end
+ src = input.shift
+ if !src
+ puts "end of input, sending 'fi'"
+ "fi"
+ else
+ puts "sending source '#{src}' ..."
+ sock.send "act:translate ||| #{src}"
+ puts "done"
+ sleep 1
+ puts "waiting for translation ..."
+ t = sock.recv
+ puts "got translation '#{t}'"
+ "#{src}\t#{t}"
+ end
+end
+
+get '/reset' do
+ cross_origin
+ input = Array.new input_
+ "done"
+end
+
--
cgit v1.2.3