From 0f10af77140704a969073e0718c2eb3ba1f99ead Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Thu, 2 Apr 2015 13:58:20 +0200 Subject: learning --- .htaccess | 4 ++ img/ajax-loader-large.gif | Bin 0 -> 19110 bytes index.php | 170 +++++---------------------------------------- lfpe.css | 19 +++++ lfpe.js | 55 +++++++++++++++ model/cdec.ini | 28 ++++++++ model/dtrain.ini | 4 ++ model/grammar.gz | Bin 0 -> 121 bytes model/nc-wmt11.en.srilm.gz | Bin 0 -> 16017291 bytes model/run | 4 ++ model/src.gz | Bin 0 -> 70 bytes model/weights.init | 12 ++++ server.rb | 57 +++++++++++++++ 13 files changed, 201 insertions(+), 152 deletions(-) create mode 100755 .htaccess create mode 100644 img/ajax-loader-large.gif create mode 100644 lfpe.css create mode 100644 lfpe.js create mode 100644 model/cdec.ini create mode 100644 model/dtrain.ini create mode 100644 model/grammar.gz create mode 100644 model/nc-wmt11.en.srilm.gz create mode 100755 model/run create mode 100644 model/src.gz create mode 100644 model/weights.init create mode 100644 server.rb diff --git a/.htaccess b/.htaccess new file mode 100755 index 0000000..76a4aff --- /dev/null +++ b/.htaccess @@ -0,0 +1,4 @@ +AuthName "Postedit Login" +AuthType Basic +AuthUserFile /workspace/post_editing/conf/.htpasswd +require valid-user diff --git a/img/ajax-loader-large.gif b/img/ajax-loader-large.gif new file mode 100644 index 0000000..1fe22da Binary files /dev/null and b/img/ajax-loader-large.gif differ diff --git a/index.php b/index.php index 3354eea..ef1574e 100644 --- a/index.php +++ b/index.php @@ -1,163 +1,29 @@
- -+ translating +
+ diff --git a/lfpe.css b/lfpe.css new file mode 100644 index 0000000..b7959fa --- /dev/null +++ b/lfpe.css @@ -0,0 +1,19 @@ +textarea { + font-size: 20px +} + +p#translating_status { + display: none; +} + +button { + margin: 1em; + padding: .25em; + background: #fff; + font-weight: bold +} + +p#fi { + display: none; +} + diff --git a/lfpe.js b/lfpe.js new file mode 100644 index 0000000..4bbbcee --- /dev/null +++ b/lfpe.js @@ -0,0 +1,55 @@ +function CreateCORSRequest(method, url) { + var xhr = new XMLHttpRequest(); + if ("withCredentials" in xhr) { + // XHR for Chrome/Firefox/Opera/Safari. + xhr.open(method, url, true); + } else if (typeof XDomainRequest != "undefined") { + // XDomainRequest for IE. + xhr = new XDomainRequest(); + xhr.open(method, url); + } else { + // CORS not supported. + xhr = null; + } + return xhr; +} + +function Next(url) +{ + url = "http://localhost:31337/next"; + var pe = document.getElementById("trgt").value; + if (pe != "") { + var src = document.getElementById("src").value; + url += "?example="+src+" %7C%7C%7C "+pe; + } + document.getElementById("translating_status").style.display = "block"; + var xhr = CreateCORSRequest('get', url); + if (!xhr) { + alert('CORS not supported'); + return; + } + + xhr.onload = function() { + var x = xhr.responseText.split("\t"); + if (x == "fi") { + document.getElementById("src").style.display = "none"; + document.getElementById("trgt").style.display = "none"; + document.getElementById("translating_status").style.display = "none"; + document.getElementById("next").innerHTML = "Thank you!"; + document.getElementById("next").disabled = true; + } else { + document.getElementById("src").value = x[0]; + document.getElementById("src").cols = x[0].length; + document.getElementById("trgt").value = x[1]; + document.getElementById("trgt").cols = x[1].length; + document.getElementById("translating_status").style.display = "none"; + } + }; + + xhr.onerror = function() { + alert('Error'); + }; + + xhr.send(); +} + diff --git a/model/cdec.ini b/model/cdec.ini new file mode 100644 index 0000000..36f53ac --- /dev/null +++ b/model/cdec.ini @@ -0,0 +1,28 @@ +formalism=scfg +add_pass_through_rules=true +scfg_max_span_limit=15 +intersection_strategy=cube_pruning +cubepruning_pop_limit=200 +grammar=grammar.gz +feature_function=WordPenalty +feature_function=KLanguageModel nc-wmt11.en.srilm.gz +# all currently working feature functions for translation: +# (with those features active that were used in the ACL paper) +#feature_function=ArityPenalty +#feature_function=CMR2008ReorderingFeatures +#feature_function=Dwarf +#feature_function=InputIndicator +#feature_function=LexNullJump +#feature_function=NewJump +#feature_function=NgramFeatures +#feature_function=NonLatinCount +#feature_function=OutputIndicator +feature_function=RuleIdentityFeatures +feature_function=RuleSourceBigramFeatures +feature_function=RuleTargetBigramFeatures +feature_function=RuleShape +#feature_function=LexicalFeatures 1 1 1 +#feature_function=SourceSpanSizeFeatures +#feature_function=SourceWordPenalty +#feature_function=SpanFeatures +weights=weights.init diff --git a/model/dtrain.ini b/model/dtrain.ini new file mode 100644 index 0000000..b6d29bb --- /dev/null +++ b/model/dtrain.ini @@ -0,0 +1,4 @@ +decoder_conf=./cdec.ini # config for cdec +k=100 # use 100best lists +N=4 # optimize (approx.) BLEU4 +margin=0.0 # perceptron's margin diff --git a/model/grammar.gz b/model/grammar.gz new file mode 100644 index 0000000..84eb73c Binary files /dev/null and b/model/grammar.gz differ diff --git a/model/nc-wmt11.en.srilm.gz b/model/nc-wmt11.en.srilm.gz new file mode 100644 index 0000000..7ce8105 Binary files /dev/null and b/model/nc-wmt11.en.srilm.gz differ diff --git a/model/run b/model/run new file mode 100755 index 0000000..43f20b0 --- /dev/null +++ b/model/run @@ -0,0 +1,4 @@ +#!/bin/bash -x + +~/src/cdec_net/training/dtrain/dtrain_net_interface -c dtrain.ini -a ipc:///tmp/dtrain.ipc + diff --git a/model/src.gz b/model/src.gz new file mode 100644 index 0000000..9aa247f Binary files /dev/null and b/model/src.gz differ diff --git a/model/weights.init b/model/weights.init new file mode 100644 index 0000000..0d09f9f --- /dev/null +++ b/model/weights.init @@ -0,0 +1,12 @@ +CountEF 0.1 +EgivenFCoherent -0.1 +Glue 0.01 +IsSingletonF -0.01 +IsSingletonFE -0.01 +LanguageModel 0.1 +LanguageModel_OOV -1 +MaxLexFgivenE -0.1 +MaxLexEgivenF -0.1 +PassThrough -0.1 +SampleCountF -0.1 +WordPenalty -0.1 diff --git a/server.rb b/server.rb new file mode 100644 index 0000000..fac51d1 --- /dev/null +++ b/server.rb @@ -0,0 +1,57 @@ +#!/usr/bin/env ruby + +require 'sinatra' +require 'sinatra/cross_origin' +require 'nanomsg' +require 'zipf' + +set :bind, '0.0.0.0' +set :port, 31337 + +set :allow_origin, :any +set :allow_methods, [:get, :post, :options] +set :allow_credentials, true +set :max_age, "1728000" +set :expose_headers, ['Content-Type'] + +sock = NanoMsg::PairSocket.new +addr = "ipc:///tmp/dtrain.ipc" +sock.bind addr + +input = ReadFile.readlines_strip "model/src.gz" +input_ = Array.new input + +get '/' do + cross_origin + "Nothing to see here." +end + +get '/next' do + cross_origin + if params[:example] + sock.send params[:example].strip + puts params.to_s + sock.recv # dummy + end + src = input.shift + if !src + puts "end of input, sending 'fi'" + "fi" + else + puts "sending source '#{src}' ..." + sock.send "act:translate ||| #{src}" + puts "done" + sleep 1 + puts "waiting for translation ..." + t = sock.recv + puts "got translation '#{t}'" + "#{src}\t#{t}" + end +end + +get '/reset' do + cross_origin + input = Array.new input_ + "done" +end + -- cgit v1.2.3