summaryrefslogtreecommitdiff
path: root/training/dtrain
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-03-15 11:31:18 +0100
committerPatrick Simianer <p@simianer.de>2013-03-15 11:31:18 +0100
commitcf67d34738e1487f75739dc1e027b1864a06513b (patch)
tree2080f05433457f84280665e8bec27a2ec9dc259a /training/dtrain
parent2b4b3adc764085bccc6ddbde96b8cc7ba4287a9f (diff)
overhauled ruby scripts and examples
Diffstat (limited to 'training/dtrain')
-rw-r--r--training/dtrain/dtrain.cc2
-rw-r--r--training/dtrain/examples/parallelized/README (renamed from training/dtrain/test/parallelize/README)0
-rw-r--r--training/dtrain/examples/parallelized/cdec.ini (renamed from training/dtrain/test/parallelize/cdec.ini)0
-rw-r--r--training/dtrain/examples/parallelized/dtrain.ini (renamed from training/dtrain/test/parallelize/dtrain.ini)5
-rw-r--r--training/dtrain/examples/parallelized/grammar/grammar.out.0.gz (renamed from training/dtrain/test/parallelize/g/grammar.out.0.gz)bin8318 -> 8318 bytes
-rw-r--r--training/dtrain/examples/parallelized/grammar/grammar.out.1.gz (renamed from training/dtrain/test/parallelize/g/grammar.out.1.gz)bin358560 -> 358560 bytes
-rw-r--r--training/dtrain/examples/parallelized/grammar/grammar.out.2.gz (renamed from training/dtrain/test/parallelize/g/grammar.out.2.gz)bin1014466 -> 1014466 bytes
-rw-r--r--training/dtrain/examples/parallelized/grammar/grammar.out.3.gz (renamed from training/dtrain/test/parallelize/g/grammar.out.3.gz)bin391811 -> 391811 bytes
-rw-r--r--training/dtrain/examples/parallelized/grammar/grammar.out.4.gz (renamed from training/dtrain/test/parallelize/g/grammar.out.4.gz)bin149590 -> 149590 bytes
-rw-r--r--training/dtrain/examples/parallelized/grammar/grammar.out.5.gz (renamed from training/dtrain/test/parallelize/g/grammar.out.5.gz)bin537024 -> 537024 bytes
-rw-r--r--training/dtrain/examples/parallelized/grammar/grammar.out.6.gz (renamed from training/dtrain/test/parallelize/g/grammar.out.6.gz)bin291286 -> 291286 bytes
-rw-r--r--training/dtrain/examples/parallelized/grammar/grammar.out.7.gz (renamed from training/dtrain/test/parallelize/g/grammar.out.7.gz)bin1038140 -> 1038140 bytes
-rw-r--r--training/dtrain/examples/parallelized/grammar/grammar.out.8.gz (renamed from training/dtrain/test/parallelize/g/grammar.out.8.gz)bin419889 -> 419889 bytes
-rw-r--r--training/dtrain/examples/parallelized/grammar/grammar.out.9.gz (renamed from training/dtrain/test/parallelize/g/grammar.out.9.gz)bin409140 -> 409140 bytes
-rw-r--r--training/dtrain/examples/parallelized/in10
-rw-r--r--training/dtrain/examples/parallelized/refs (renamed from training/dtrain/test/parallelize/refs)0
-rw-r--r--training/dtrain/examples/parallelized/work/out.0.061
-rw-r--r--training/dtrain/examples/parallelized/work/out.0.162
-rw-r--r--training/dtrain/examples/parallelized/work/out.1.061
-rw-r--r--training/dtrain/examples/parallelized/work/out.1.162
-rw-r--r--training/dtrain/examples/parallelized/work/shard.0.0.in5
-rw-r--r--training/dtrain/examples/parallelized/work/shard.0.0.refs5
-rw-r--r--training/dtrain/examples/parallelized/work/shard.1.0.in5
-rw-r--r--training/dtrain/examples/parallelized/work/shard.1.0.refs5
-rw-r--r--training/dtrain/examples/parallelized/work/weights.012
-rw-r--r--training/dtrain/examples/parallelized/work/weights.0.012
-rw-r--r--training/dtrain/examples/parallelized/work/weights.0.112
-rw-r--r--training/dtrain/examples/parallelized/work/weights.112
-rw-r--r--training/dtrain/examples/parallelized/work/weights.1.011
-rw-r--r--training/dtrain/examples/parallelized/work/weights.1.112
-rw-r--r--training/dtrain/examples/standard/README2
-rw-r--r--training/dtrain/examples/standard/cdec.ini (renamed from training/dtrain/test/example/cdec.ini)1
-rw-r--r--training/dtrain/examples/standard/dtrain.ini24
-rw-r--r--training/dtrain/examples/standard/expected-output1206
-rw-r--r--training/dtrain/examples/standard/nc-wmt11.de.gzbin0 -> 58324 bytes
-rw-r--r--training/dtrain/examples/standard/nc-wmt11.en.gzbin0 -> 49600 bytes
-rw-r--r--training/dtrain/examples/standard/nc-wmt11.en.srilm.gzbin0 -> 16017291 bytes
-rw-r--r--training/dtrain/examples/standard/nc-wmt11.grammar.gzbin0 -> 1399924 bytes
-rw-r--r--training/dtrain/examples/toy/cdec.ini (renamed from training/dtrain/test/toy/cdec.ini)1
-rw-r--r--training/dtrain/examples/toy/dtrain.ini (renamed from training/dtrain/test/toy/dtrain.ini)5
-rw-r--r--training/dtrain/examples/toy/expected-output77
-rw-r--r--training/dtrain/examples/toy/grammar.gzbin0 -> 219 bytes
-rw-r--r--training/dtrain/examples/toy/src2
-rw-r--r--training/dtrain/examples/toy/tgt2
-rwxr-xr-xtraining/dtrain/lplp.rb18
-rwxr-xr-xtraining/dtrain/parallelize.rb26
-rw-r--r--training/dtrain/test/example/README8
-rw-r--r--training/dtrain/test/example/dtrain.ini22
-rw-r--r--training/dtrain/test/example/expected-output89
-rw-r--r--training/dtrain/test/parallelize/in10
-rw-r--r--training/dtrain/test/toy/input2
51 files changed, 1684 insertions, 165 deletions
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index dfb5b351..fcb46db2 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -254,8 +254,6 @@ main(int argc, char** argv)
time_t start, end;
time(&start);
- igzstream grammar_buf_in;
- if (t > 0) grammar_buf_in.open(grammar_buf_fn.c_str());
score_t score_sum = 0.;
score_t model_sum(0);
unsigned ii = 0, rank_errors = 0, margin_violations = 0, npairs = 0, f_count = 0, list_sz = 0;
diff --git a/training/dtrain/test/parallelize/README b/training/dtrain/examples/parallelized/README
index 89715105..89715105 100644
--- a/training/dtrain/test/parallelize/README
+++ b/training/dtrain/examples/parallelized/README
diff --git a/training/dtrain/test/parallelize/cdec.ini b/training/dtrain/examples/parallelized/cdec.ini
index e43ba1c4..e43ba1c4 100644
--- a/training/dtrain/test/parallelize/cdec.ini
+++ b/training/dtrain/examples/parallelized/cdec.ini
diff --git a/training/dtrain/test/parallelize/dtrain.ini b/training/dtrain/examples/parallelized/dtrain.ini
index 03f9d240..f19ef891 100644
--- a/training/dtrain/test/parallelize/dtrain.ini
+++ b/training/dtrain/examples/parallelized/dtrain.ini
@@ -2,7 +2,7 @@ k=100
N=4
learning_rate=0.0001
gamma=0
-loss_margin=0
+loss_margin=1.0
epochs=1
scorer=stupid_bleu
sample_from=kbest
@@ -11,5 +11,6 @@ pair_sampling=XYX
hi_lo=0.1
select_weights=last
print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
-tmp=/tmp
+# newer version of the grammar extractor use different feature names:
+#print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
decoder_config=cdec.ini
diff --git a/training/dtrain/test/parallelize/g/grammar.out.0.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz
index 1e28a24b..1e28a24b 100644
--- a/training/dtrain/test/parallelize/g/grammar.out.0.gz
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.1.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz
index 372f5675..372f5675 100644
--- a/training/dtrain/test/parallelize/g/grammar.out.1.gz
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.2.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz
index 145d0dc0..145d0dc0 100644
--- a/training/dtrain/test/parallelize/g/grammar.out.2.gz
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.3.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz
index 105593ff..105593ff 100644
--- a/training/dtrain/test/parallelize/g/grammar.out.3.gz
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.4.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz
index 30781f48..30781f48 100644
--- a/training/dtrain/test/parallelize/g/grammar.out.4.gz
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.5.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz
index 834ee759..834ee759 100644
--- a/training/dtrain/test/parallelize/g/grammar.out.5.gz
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.6.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz
index 2e76f348..2e76f348 100644
--- a/training/dtrain/test/parallelize/g/grammar.out.6.gz
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.7.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz
index 3741a887..3741a887 100644
--- a/training/dtrain/test/parallelize/g/grammar.out.7.gz
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.8.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz
index ebf6bd0c..ebf6bd0c 100644
--- a/training/dtrain/test/parallelize/g/grammar.out.8.gz
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz
Binary files differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.9.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz
index c1791059..c1791059 100644
--- a/training/dtrain/test/parallelize/g/grammar.out.9.gz
+++ b/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz
Binary files differ
diff --git a/training/dtrain/examples/parallelized/in b/training/dtrain/examples/parallelized/in
new file mode 100644
index 00000000..51d01fe7
--- /dev/null
+++ b/training/dtrain/examples/parallelized/in
@@ -0,0 +1,10 @@
+<seg grammar="grammar/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg>
+<seg grammar="grammar/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg>
+<seg grammar="grammar/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg>
+<seg grammar="grammar/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg>
+<seg grammar="grammar/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg>
+<seg grammar="grammar/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg>
+<seg grammar="grammar/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg>
+<seg grammar="grammar/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg>
+<seg grammar="grammar/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg>
+<seg grammar="grammar/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg>
diff --git a/training/dtrain/test/parallelize/refs b/training/dtrain/examples/parallelized/refs
index 632e27b0..632e27b0 100644
--- a/training/dtrain/test/parallelize/refs
+++ b/training/dtrain/examples/parallelized/refs
diff --git a/training/dtrain/examples/parallelized/work/out.0.0 b/training/dtrain/examples/parallelized/work/out.0.0
new file mode 100644
index 00000000..7a00ed0f
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.0.0
@@ -0,0 +1,61 @@
+ cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 3121929377
+
+dtrain
+Parameters:
+ k 100
+ N 4
+ T 1
+ scorer 'stupid_bleu'
+ sample from 'kbest'
+ filter 'uniq'
+ learning rate 0.0001
+ gamma 0
+ loss margin 1
+ pairs 'XYX'
+ hi lo 0.1
+ pair threshold 0
+ select weights 'last'
+ l1 reg 0 'none'
+ max pairs 4294967295
+ cdec cfg 'cdec.ini'
+ input 'work/shard.0.0.in'
+ refs 'work/shard.0.0.refs'
+ output 'work/weights.0.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+ 5
+WEIGHTS
+ Glue = +0.2663
+ WordPenalty = -0.0079042
+ LanguageModel = +0.44782
+ LanguageModel_OOV = -0.0401
+ PhraseModel_0 = -0.193
+ PhraseModel_1 = +0.71321
+ PhraseModel_2 = +0.85196
+ PhraseModel_3 = -0.43986
+ PhraseModel_4 = -0.44803
+ PhraseModel_5 = -0.0538
+ PhraseModel_6 = -0.1788
+ PassThrough = -0.1477
+ ---
+ 1best avg score: 0.17521 (+0.17521)
+ 1best avg model score: 21.556 (+21.556)
+ avg # pairs: 1671.2
+ avg # rank err: 1118.6
+ avg # margin viol: 552.6
+ non0 feature count: 12
+ avg list sz: 100
+ avg f count: 11.32
+(time 0.37 min, 4.4 s/S)
+
+Writing weights file to 'work/weights.0.0' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.17521].
+This took 0.36667 min.
diff --git a/training/dtrain/examples/parallelized/work/out.0.1 b/training/dtrain/examples/parallelized/work/out.0.1
new file mode 100644
index 00000000..e2bd6649
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.0.1
@@ -0,0 +1,62 @@
+ cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 2767202922
+
+dtrain
+Parameters:
+ k 100
+ N 4
+ T 1
+ scorer 'stupid_bleu'
+ sample from 'kbest'
+ filter 'uniq'
+ learning rate 0.0001
+ gamma 0
+ loss margin 1
+ pairs 'XYX'
+ hi lo 0.1
+ pair threshold 0
+ select weights 'last'
+ l1 reg 0 'none'
+ max pairs 4294967295
+ cdec cfg 'cdec.ini'
+ input 'work/shard.0.0.in'
+ refs 'work/shard.0.0.refs'
+ output 'work/weights.0.1'
+ weights in 'work/weights.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+ 5
+WEIGHTS
+ Glue = -0.2699
+ WordPenalty = +0.080605
+ LanguageModel = -0.026572
+ LanguageModel_OOV = -0.30025
+ PhraseModel_0 = -0.32076
+ PhraseModel_1 = +0.67451
+ PhraseModel_2 = +0.92
+ PhraseModel_3 = -0.36402
+ PhraseModel_4 = -0.592
+ PhraseModel_5 = -0.0269
+ PhraseModel_6 = -0.28755
+ PassThrough = -0.33285
+ ---
+ 1best avg score: 0.26638 (+0.26638)
+ 1best avg model score: 53.197 (+53.197)
+ avg # pairs: 2028.6
+ avg # rank err: 998.2
+ avg # margin viol: 918.8
+ non0 feature count: 12
+ avg list sz: 100
+ avg f count: 10.496
+(time 0.32 min, 3.8 s/S)
+
+Writing weights file to 'work/weights.0.1' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.26638].
+This took 0.31667 min.
diff --git a/training/dtrain/examples/parallelized/work/out.1.0 b/training/dtrain/examples/parallelized/work/out.1.0
new file mode 100644
index 00000000..6e790e38
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.1.0
@@ -0,0 +1,61 @@
+ cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 1432415010
+
+dtrain
+Parameters:
+ k 100
+ N 4
+ T 1
+ scorer 'stupid_bleu'
+ sample from 'kbest'
+ filter 'uniq'
+ learning rate 0.0001
+ gamma 0
+ loss margin 1
+ pairs 'XYX'
+ hi lo 0.1
+ pair threshold 0
+ select weights 'last'
+ l1 reg 0 'none'
+ max pairs 4294967295
+ cdec cfg 'cdec.ini'
+ input 'work/shard.1.0.in'
+ refs 'work/shard.1.0.refs'
+ output 'work/weights.1.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+ 5
+WEIGHTS
+ Glue = -0.3815
+ WordPenalty = +0.20064
+ LanguageModel = +0.95304
+ LanguageModel_OOV = -0.264
+ PhraseModel_0 = -0.22362
+ PhraseModel_1 = +0.12254
+ PhraseModel_2 = +0.26328
+ PhraseModel_3 = +0.38018
+ PhraseModel_4 = -0.48654
+ PhraseModel_5 = +0
+ PhraseModel_6 = -0.3645
+ PassThrough = -0.2216
+ ---
+ 1best avg score: 0.10863 (+0.10863)
+ 1best avg model score: -4.9841 (-4.9841)
+ avg # pairs: 1345.4
+ avg # rank err: 822.4
+ avg # margin viol: 501
+ non0 feature count: 11
+ avg list sz: 100
+ avg f count: 11.814
+(time 0.45 min, 5.4 s/S)
+
+Writing weights file to 'work/weights.1.0' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.10863].
+This took 0.45 min.
diff --git a/training/dtrain/examples/parallelized/work/out.1.1 b/training/dtrain/examples/parallelized/work/out.1.1
new file mode 100644
index 00000000..0b984761
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.1.1
@@ -0,0 +1,62 @@
+ cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 1771918374
+
+dtrain
+Parameters:
+ k 100
+ N 4
+ T 1
+ scorer 'stupid_bleu'
+ sample from 'kbest'
+ filter 'uniq'
+ learning rate 0.0001
+ gamma 0
+ loss margin 1
+ pairs 'XYX'
+ hi lo 0.1
+ pair threshold 0
+ select weights 'last'
+ l1 reg 0 'none'
+ max pairs 4294967295
+ cdec cfg 'cdec.ini'
+ input 'work/shard.1.0.in'
+ refs 'work/shard.1.0.refs'
+ output 'work/weights.1.1'
+ weights in 'work/weights.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+ 5
+WEIGHTS
+ Glue = -0.3178
+ WordPenalty = +0.11092
+ LanguageModel = +0.17269
+ LanguageModel_OOV = -0.13485
+ PhraseModel_0 = -0.45371
+ PhraseModel_1 = +0.38789
+ PhraseModel_2 = +0.75311
+ PhraseModel_3 = -0.38163
+ PhraseModel_4 = -0.58817
+ PhraseModel_5 = -0.0269
+ PhraseModel_6 = -0.27315
+ PassThrough = -0.16745
+ ---
+ 1best avg score: 0.13169 (+0.13169)
+ 1best avg model score: 24.226 (+24.226)
+ avg # pairs: 1951.2
+ avg # rank err: 985.4
+ avg # margin viol: 951
+ non0 feature count: 12
+ avg list sz: 100
+ avg f count: 11.224
+(time 0.42 min, 5 s/S)
+
+Writing weights file to 'work/weights.1.1' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.13169].
+This took 0.41667 min.
diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.in b/training/dtrain/examples/parallelized/work/shard.0.0.in
new file mode 100644
index 00000000..92f9c78e
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.0.0.in
@@ -0,0 +1,5 @@
+<seg grammar="grammar/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg>
+<seg grammar="grammar/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg>
+<seg grammar="grammar/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg>
+<seg grammar="grammar/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg>
+<seg grammar="grammar/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg>
diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.refs b/training/dtrain/examples/parallelized/work/shard.0.0.refs
new file mode 100644
index 00000000..bef68fee
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.0.0.refs
@@ -0,0 +1,5 @@
+europe 's divided racial house
+a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge .
+the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them .
+while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon .
+an aging population at home and ever more open borders imply increasing racial fragmentation in european countries .
diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.in b/training/dtrain/examples/parallelized/work/shard.1.0.in
new file mode 100644
index 00000000..b7695ce7
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.1.0.in
@@ -0,0 +1,5 @@
+<seg grammar="grammar/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg>
+<seg grammar="grammar/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg>
+<seg grammar="grammar/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg>
+<seg grammar="grammar/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg>
+<seg grammar="grammar/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg>
diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.refs b/training/dtrain/examples/parallelized/work/shard.1.0.refs
new file mode 100644
index 00000000..6076f6d5
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.1.0.refs
@@ -0,0 +1,5 @@
+mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear .
+it will not , as america 's racial history clearly shows .
+race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes .
+the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths .
+this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us .
diff --git a/training/dtrain/examples/parallelized/work/weights.0 b/training/dtrain/examples/parallelized/work/weights.0
new file mode 100644
index 00000000..ddd595a8
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.0
@@ -0,0 +1,12 @@
+LanguageModel 0.7004298992212881
+PhraseModel_2 0.5576194336478857
+PhraseModel_1 0.41787318415343155
+PhraseModel_4 -0.46728502545635164
+PhraseModel_3 -0.029839521598455515
+Glue -0.05760000000000068
+PhraseModel_6 -0.2716499999999978
+PhraseModel_0 -0.20831031065605327
+LanguageModel_OOV -0.15205000000000077
+PassThrough -0.1846500000000006
+WordPenalty 0.09636994553433414
+PhraseModel_5 -0.026900000000000257
diff --git a/training/dtrain/examples/parallelized/work/weights.0.0 b/training/dtrain/examples/parallelized/work/weights.0.0
new file mode 100644
index 00000000..c9370b18
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.0.0
@@ -0,0 +1,12 @@
+WordPenalty -0.0079041595706392243
+LanguageModel 0.44781580828279532
+LanguageModel_OOV -0.04010000000000042
+Glue 0.26629999999999948
+PhraseModel_0 -0.19299677809125185
+PhraseModel_1 0.71321026861732773
+PhraseModel_2 0.85195540993310537
+PhraseModel_3 -0.43986310822842656
+PhraseModel_4 -0.44802855630415955
+PhraseModel_5 -0.053800000000000514
+PhraseModel_6 -0.17879999999999835
+PassThrough -0.14770000000000036
diff --git a/training/dtrain/examples/parallelized/work/weights.0.1 b/training/dtrain/examples/parallelized/work/weights.0.1
new file mode 100644
index 00000000..8fad3de8
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.0.1
@@ -0,0 +1,12 @@
+WordPenalty 0.080605055841244472
+LanguageModel -0.026571720531022844
+LanguageModel_OOV -0.30024999999999141
+Glue -0.26989999999999842
+PhraseModel_2 0.92000295209089566
+PhraseModel_1 0.67450748692470841
+PhraseModel_4 -0.5920000014976784
+PhraseModel_3 -0.36402437203127397
+PhraseModel_6 -0.28754999999999603
+PhraseModel_0 -0.32076244202907672
+PassThrough -0.33284999999999004
+PhraseModel_5 -0.026900000000000257
diff --git a/training/dtrain/examples/parallelized/work/weights.1 b/training/dtrain/examples/parallelized/work/weights.1
new file mode 100644
index 00000000..03058a16
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.1
@@ -0,0 +1,12 @@
+PhraseModel_2 0.8365578543552836
+PhraseModel_4 -0.5900840266009169
+PhraseModel_1 0.5312000609786991
+PhraseModel_0 -0.3872342271319619
+PhraseModel_3 -0.3728279676912084
+Glue -0.2938500000000036
+PhraseModel_6 -0.2803499999999967
+PassThrough -0.25014999999999626
+LanguageModel_OOV -0.21754999999999702
+LanguageModel 0.07306061161169894
+WordPenalty 0.09576193325966899
+PhraseModel_5 -0.026900000000000257
diff --git a/training/dtrain/examples/parallelized/work/weights.1.0 b/training/dtrain/examples/parallelized/work/weights.1.0
new file mode 100644
index 00000000..6a6a65c1
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.1.0
@@ -0,0 +1,11 @@
+WordPenalty 0.20064405063930751
+LanguageModel 0.9530439901597807
+LanguageModel_OOV -0.26400000000000112
+Glue -0.38150000000000084
+PhraseModel_0 -0.22362384322085468
+PhraseModel_1 0.12253609968953538
+PhraseModel_2 0.26328345736266612
+PhraseModel_3 0.38018406503151553
+PhraseModel_4 -0.48654149460854373
+PhraseModel_6 -0.36449999999999722
+PassThrough -0.22160000000000085
diff --git a/training/dtrain/examples/parallelized/work/weights.1.1 b/training/dtrain/examples/parallelized/work/weights.1.1
new file mode 100644
index 00000000..f56ea4a2
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.1.1
@@ -0,0 +1,12 @@
+WordPenalty 0.1109188106780935
+LanguageModel 0.17269294375442074
+LanguageModel_OOV -0.13485000000000266
+Glue -0.3178000000000088
+PhraseModel_2 0.75311275661967159
+PhraseModel_1 0.38789263503268989
+PhraseModel_4 -0.58816805170415531
+PhraseModel_3 -0.38163156335114284
+PhraseModel_6 -0.27314999999999739
+PhraseModel_0 -0.45370601223484697
+PassThrough -0.16745000000000249
+PhraseModel_5 -0.026900000000000257
diff --git a/training/dtrain/examples/standard/README b/training/dtrain/examples/standard/README
new file mode 100644
index 00000000..ce37d31a
--- /dev/null
+++ b/training/dtrain/examples/standard/README
@@ -0,0 +1,2 @@
+Call `dtrain` from this folder with ../../dtrain -c dtrain.ini .
+
diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/examples/standard/cdec.ini
index 0215416d..e1edc68d 100644
--- a/training/dtrain/test/example/cdec.ini
+++ b/training/dtrain/examples/standard/cdec.ini
@@ -3,6 +3,7 @@ add_pass_through_rules=true
scfg_max_span_limit=15
intersection_strategy=cube_pruning
cubepruning_pop_limit=200
+grammar=nc-wmt11.grammar.gz
feature_function=WordPenalty
feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz
# all currently working feature functions for translation:
diff --git a/training/dtrain/examples/standard/dtrain.ini b/training/dtrain/examples/standard/dtrain.ini
new file mode 100644
index 00000000..a05e9c29
--- /dev/null
+++ b/training/dtrain/examples/standard/dtrain.ini
@@ -0,0 +1,24 @@
+input=./nc-wmt11.de.gz
+refs=./nc-wmt11.en.gz
+output=- # a weights file (add .gz for gzip compression) or STDOUT '-'
+select_weights=avg # output average (over epochs) weight vector
+decoder_config=./cdec.ini # config for cdec
+# weights for these features will be printed on each iteration
+print_weights= EgivenFCoherent SampleCountF CountEF MaxLexFgivenE MaxLexEgivenF IsSingletonF IsSingletonFE Glue WordPenalty PassThrough LanguageModel LanguageModel_OOV
+# newer version of the grammar extractor use different feature names:
+#print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
+stop_after=10 # stop epoch after 10 inputs
+
+# interesting stuff
+epochs=2 # run over input 2 times
+k=100 # use 100best lists
+N=4 # optimize (approx) BLEU4
+scorer=stupid_bleu # use 'stupid' BLEU+1
+learning_rate=1.0 # learning rate, don't care if gamma=0 (perceptron)
+gamma=0 # use SVM reg
+sample_from=kbest # use kbest lists (as opposed to forest)
+filter=uniq # only unique entries in kbest (surface form)
+pair_sampling=XYX #
+hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here
+pair_threshold=0 # minimum distance in BLEU (here: > 0)
+loss_margin=0
diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output
new file mode 100644
index 00000000..8d72f4c3
--- /dev/null
+++ b/training/dtrain/examples/standard/expected-output
@@ -0,0 +1,1206 @@
+ cdec cfg './cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ./nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+ Example feature: Shape_S00000_T00000
+Seeding random number sequence to 1511823303
+
+dtrain
+Parameters:
+ k 100
+ N 4
+ T 2
+ scorer 'stupid_bleu'
+ sample from 'kbest'
+ filter 'uniq'
+ learning rate 1
+ gamma 0
+ loss margin 0
+ pairs 'XYX'
+ hi lo 0.1
+ pair threshold 0
+ select weights 'avg'
+ l1 reg 0 'none'
+ max pairs 4294967295
+ cdec cfg './cdec.ini'
+ input './nc-wmt11.de.gz'
+ refs './nc-wmt11.en.gz'
+ output '-'
+ stop_after 10
+(a dot represents 10 inputs)
+Iteration #1 of 2.
+ . 10
+Stopping after 10 input sentences.
+WEIGHTS
+ EgivenFCoherent = +0
+ SampleCountF = +0
+ CountEF = +0
+ MaxLexFgivenE = +0
+ MaxLexEgivenF = +0
+ IsSingletonF = +0
+ IsSingletonFE = +0
+ Glue = -576
+ WordPenalty = +417.79
+ PassThrough = -1455
+ LanguageModel = +5117.5
+ LanguageModel_OOV = -1307
+ ---
+ 1best avg score: 0.27697 (+0.27697)
+ 1best avg model score: -47918 (-47918)
+ avg # pairs: 1129.8
+ avg # rank err: 581.9
+ avg # margin viol: 0
+ non0 feature count: 703
+ avg list sz: 90.9
+ avg f count: 100.09
+(time 0.33 min, 2 s/S)
+
+Iteration #2 of 2.
+ . 10
+WEIGHTS
+ EgivenFCoherent = +0
+ SampleCountF = +0
+ CountEF = +0
+ MaxLexFgivenE = +0
+ MaxLexEgivenF = +0
+ IsSingletonF = +0
+ IsSingletonFE = +0
+ Glue = -622
+ WordPenalty = +898.56
+ PassThrough = -2578
+ LanguageModel = +8066.2
+ LanguageModel_OOV = -2590
+ ---
+ 1best avg score: 0.37119 (+0.094226)
+ 1best avg model score: -1.3174e+05 (-83822)
+ avg # pairs: 1214.9
+ avg # rank err: 584.1
+ avg # margin viol: 0
+ non0 feature count: 1115
+ avg list sz: 91.3
+ avg f count: 90.755
+(time 0.27 min, 1.6 s/S)
+
+Writing weights file to '-' ...
+R:X:NX_sein:N1_its 61.5
+WordPenalty 658.17328732437022
+LanguageModel 6591.8747593425214
+LanguageModel_OOV -1948.5
+R:X:das_NX:this_N1 12
+R:X:NX_sein_NX:N1_from_ever_being_able_to_N2 30
+R:X:NX_bemühen:N1_effort 2.5
+RBS:X_bemühen 2.5
+R:X:sich:sich -17.5
+RBT:<r>_sich -17.5
+RBT:sich_</r> -17.5
+RBS:sich_X 17.5
+RBS:<r>_als 147
+RBS:als_</r> -59
+Shape_S10000_T10000 -1711.5
+RBT:<r>_when 84
+R:X:zum_NX:as_N1 -134
+RBS:<r>_zum -30
+R:X:als_NX:as_N1 63
+R:X:zum_NX:'s_N1 33
+R:X:zum_NX:the_N1 24
+RBS:X_sich -12
+R:X:zum_NX:to_N1 -36
+R:X:zum_NX:with_the_N1 83
+R:X:NX_zum:N1_the -66
+R:X:NX_zum:N1_to 66
+R:X:als_NX:when_N1 84
+RBS:als_das 59
+RBS:X_das -104
+R:X:NX_das:N1_a 28.5
+R:X:er_sich_NX:he_N1 86.5
+RBS:er_sich 29.5
+R:X:NX_das:N1_it -6
+R:X:er_sich_NX:him_N1 -57
+RBT:<r>_declared -488
+R:X:NX_das:N1_that -5
+RBT:declared_</r> -8
+R:X:NX_das:N1_the -57
+R:X:NX_das:N1_this -17
+R:X:NX_.:N1_. -323
+RBS:X_. 134
+R:X:NX_.:N1_debate_. 6.5
+R:X:NX_.:N1_disruptions_. -14.5
+R:X:NX_.:N1_established_. 7.5
+R:X:NX_.:N1_heading_. 17
+R:X:NX_.:N1_on_. 94
+R:X:NX_.:N1_pace_. 51.5
+R:X:NX_das_NX:N1_a_growing_N2 -45
+R:X:general:general -23.5
+R:X:NX_.:N1_politics_. 84
+R:X:NX_das_NX:N1_a_N2 -0.5
+R:X:NX_.:N1_power_. -99.5
+RBS:general_</r> -23.5
+R:X:NX_.:N1_-_range_missiles_. -28.5
+Shape_S11000_T11000 40
+RBT:general_</r> -23.5
+RBT:<r>_. -645
+R:X:betrat:entered -91
+R:X:NX_.:N1_war_. 68.5
+RBS:<r>_betrat 23.5
+Shape_S11000_T01100 475.5
+RBT:<r>_entered -91
+RBT:entered_</r> -91
+R:X:NX_das_NX:N1_the_N2 -2
+R:X:betrat:betrat 114.5
+RBT:<r>_betrat 114.5
+RBT:betrat_</r> 114.5
+R:X:12:12 79
+R:X:maßnahmen:action 24
+R:X:.:. -566
+RBS:12_</r> 79
+RBS:<r>_maßnahmen -44.5
+RBS:<r>_. -645
+RBT:._</r> -566
+RBT:<r>_action 24
+RBT:12_</r> 79
+RBT:action_</r> 24
+R:X:maßnahmen:actions -13
+RBT:<r>_actions -13
+RBT:actions_</r> -13
+R:X:12_NX:12_N1 -79
+RBT:declared_a -428
+RBS:12_X -79
+RBT:a_state -428
+RBT:state_of -428
+R:X:maßnahmen:maßnahmen -55.5
+R:X:internationale_NX:global_N1 -270
+RBS:X_am 316.5
+RBT:<r>_maßnahmen -55.5
+RBS:am_</r> 267.5
+RBT:maßnahmen_</r> -55.5
+RBS:<r>_den 883
+R:X:internationale_NX:international_N1 270
+RBS:den_X -286.5
+R:X:NX_am:N1_of 267.5
+R:X:NX_als:N1_a -273.5
+RBS:am_X -281
+R:X:den_NX:'s_N1 -31
+R:X:NX_am_NX:N1_of_N2 -30
+R:X:NX_am_NX:N1_on_N2 79
+R:X:NX_als:N1_'s 273.5
+R:X:NX_betrat:N1_entered -23.5
+R:X:ins_NX:into_the_N1 -32.5
+RBS:X_betrat -23.5
+RBT:into_the -55
+R:X:ins_NX:into_N1 32.5
+RBT:<r>_their 303
+R:X:general_NX:general_N1 23.5
+RBS:general_X 23.5
+RBS:<r>_am -316.5
+R:X:den_NX:the_N1 89
+R:X:den_NX_.:the_N1_. 86.5
+R:X:NX_und:and_N1 -216
+RBS:X_und -203.5
+RBS:und_</r> 522.5
+RBT:<r>_and 438.5
+R:X:am_NX:at_N1 23
+R:X:NX_als_das:N1_than_the 59
+R:X:NX_und:N1_- -114
+R:X:NX_und:N1_, 114
+R:X:am_NX:of_N1 -4
+R:X:am_NX:on_N1 -158.5
+R:X:am_NX:the_N1 -190
+RBS:<r>_seine -16.5
+RBS:seine_</r> 39
+R:X:oktober:october -79.5
+R:X:seine:his -5.5
+RBS:<r>_oktober -79.5
+R:X:seine:its 50
+RBT:<r>_october -79.5
+RBT:october_</r> -79.5
+R:X:seine_NX:a_N1 7.5
+RBS:seine_X -39
+R:X:NX_und_NX:and_N1_N2 -22
+RBS:und_X 160.5
+R:X:seine_NX:his_N1 -97
+R:X:seine_NX:its_N1 102.5
+R:X:NX_und_NX:N1_,_and_N2 -4
+R:X:NX_maßnahmen:N1_actions 44.5
+RBS:X_maßnahmen 44.5
+R:X:seine_NX_als:his_N1_than 5.5
+R:X:seine_NX_als:its_N1_as -64.5
+R:X:NX_und_NX:N1_,_N2 -7
+Shape_S01100_T11000 -312.5
+RBS:und_den -822.5
+Shape_S01100_T01100 -537.5
+Shape_S01100_T11100 15
+R:X:NX_seine:'s_N1 -5.5
+RBS:X_seine 16.5
+RBS:X_den -38
+R:X:amerika_NX_sich_NX:america_N1_N2 -12
+R:X:NX_seine_NX:'s_N1_N2 22
+R:X:auf_NX_den_NX:to_N1_the_N2 -23
+R:X:auf_NX_den_NX:to_N1_N2 -23
+RBS:<r>_unterstützen -716
+RBS:unterstützen_</r> -1
+Shape_S11100_T11000 783.5
+Shape_S11100_T01100 -716
+Shape_S11100_T11100 488
+R:X:unterstützen:unterstützen -1
+RBT:<r>_unterstützen -1
+RBT:unterstützen_</r> -1
+R:X:unterstützen_NX:support_N1 -715
+RBS:unterstützen_X -715
+RBT:<r>_will -6
+RBS:X_unterstützen 716
+RBT:<r>_if 35
+R:X:NX_den_NX_.:N1_N2_. 41
+R:X:verfassung:constitution 15
+RBS:<r>_verfassung -43
+RBT:<r>_constitution 15
+RBT:constitution_</r> 15
+R:X:verfassung:constitutional 9.5
+RBT:<r>_constitutional 9.5
+RBS:unterstützen_. 716
+RBT:constitutional_</r> 9.5
+R:X:NX_unterstützen_.:N1_. 716
+R:X:verfassung:verfassung -67.5
+R:X:eine_NX:an_N1 162
+RBT:<r>_verfassung -67.5
+RBT:verfassung_</r> -67.5
+R:X:und:, -21.5
+R:X:,_NX_zu_NX:to_N2_N1 -153
+RBS:<r>_und -389.5
+R:X:und:and -35
+RBS:angeführten_</r> -716
+RBT:and_</r> -35
+RBT:<r>_as 63
+RBS:versucht_</r> 68
+R:X:und:with -3
+R:X:eine_NX:is_N1 -162
+RBS:angeführten_X 716
+R:X:und:und 91
+RBT:<r>_und 91
+RBT:und_</r> 91
+R:X:versucht:tried 68
+RBT:tried_</r> 68
+RBS:versucht_X -68
+R:X:versucht_NX:tried_N1 -68
+R:X:und_NX:and_N1 250
+R:X:und_NX:with_N1 -18
+R:X:und_NX:,_N1 -7
+R:X:und_NX:N1_and -12
+R:X:und_den_NX:and_N1 -716
+R:X:er:he 17
+R:X:NX_eine:N1_is -7
+RBS:<r>_er -47.5
+RBS:er_</r> 54
+RBT:<r>_he 485.5
+RBT:he_</r> 17
+RBT:<r>_him -1
+R:X:und_NX_.:,_N1_. -3
+R:X:er:his 91
+R:X:und_den_NX_.:and_the_N1_. 88
+R:X:NX_eine:N1_will 7
+R:X:er:it 3
+R:X:und_den_NX_.:and_N1_. -216.5
+R:X:er:er -196
+RBT:<r>_er -196
+RBT:er_</r> -196
+RBS:er_X 8
+R:X:er_NX:he_N1 399
+R:X:er_NX:it_N1 -379
+Shape_S01010_T01010 -599
+RBS:pakistanischen_</r> 43
+R:X:NX_versucht:N1_tried 196
+RBT:<r>_pakistan -43
+RBT:<r>_pakistani 2
+R:X:er_NX_,_NX:he_N1_N2 -12
+R:X:NX_hat_er:N1_,_he_has 196
+RBS:hat_er 196
+R:X:NX_er:he_N1 -17
+RBS:X_er -148.5
+RBS:pakistanischen_X -43
+R:X:NX_er:it_N1 -7
+RBS:X_verfassung 43
+R:X:NX_verfassung:N1_'s_constitution 43
+R:X:NX_hat_NX_versucht:N1_N2_has_tried -190
+R:X:NX_hat_NX_versucht:N1_,_N2_has_tried -6
+RBS:der_pakistanischen 43
+RBS:X_pakistanischen -43
+RBS:<r>_aber 46
+RBS:,_als -147
+RBT:<r>_but -321
+R:X:aber_NX:but_N1 46
+R:X:von_NX_angeführten:N1_-_led -716
+R:X:von_NX_angeführten_NX:N1_-_led_N2 716
+RBS:,_aber -114
+RBS:X_aber 68
+R:X:,_als_NX:,_as_N1 -40
+R:X:NX_aber_NX_,:N1_N2_to 68
+R:X:NX_pakistanischen_NX_.:pakistan_N1_N2_. -43
+R:X:NX_,_aber_NX:N1_,_N2 -114
+RBS:<r>_rahmen 43
+RBS:rahmen_</r> 43
+R:X:rahmen:within 20
+R:X:rahmen:rahmen 23
+RBT:<r>_rahmen 23
+RBT:rahmen_</r> 23
+Shape_S01110_T11010 35.5
+R:X:NX_der_pakistanischen:N1_pakistan 43
+Shape_S01110_T01110 -1195
+Shape_S01110_T11110 -6.5
+R:X:NX_,_NX_er:N1_N2_he -33
+RBS:geben_X -577.5
+RBS:<r>_gestalten 196
+Shape_S01110_T01011 278
+RBS:gestalten_</r> 196
+RBS:geben_und 577.5
+R:X:gestalten:more 221
+Shape_S01110_T01111 -181.5
+RBT:<r>_more 221
+RBT:more_</r> 221
+R:X:gestalten:gestalten -25
+RBT:<r>_gestalten -25
+RBT:gestalten_</r> -25
+R:X:effektiver:effectively -151
+RBS:<r>_effektiver 54
+RBS:effektiver_</r> -221
+RBT:<r>_effectively -151
+RBT:effectively_</r> -151
+R:X:effektiver:effektiver -99
+RBT:<r>_effektiver -99
+RBT:effektiver_</r> -99
+Shape_S11110_T11010 -1130
+RBS:zu_geben -107.5
+R:X:effektiver_zu_NX:N1_effectively 304
+RBS:effektiver_zu 221
+RBS:X_geben 107.5
+Shape_S11110_T01110 621
+Shape_S11110_T11110 -75
+RBS:X_gestalten -196
+R:X:NX_gestalten_.:N1_. -196
+RBS:gestalten_. -196
+R:X:terror:terror 672
+RBS:<r>_terror -16
+RBS:terror_</r> 640
+R:X:den:- -4
+RBT:<r>_terror 136
+RBT:terror_</r> 646
+RBS:den_</r> 42.5
+R:X:den:for -11.5
+R:X:terror:terrorism -54
+RBT:<r>_terrorism -54
+Shape_S11110_T11011 -4.5
+RBT:terrorism_</r> -54
+R:X:terror_NX:terror_N1 -634
+R:X:den:of -17
+RBS:terror_X -640
+R:X:den:'s 32.5
+Shape_S11110_T01111 -1.5
+R:X:NX_effektiver:N1_more_effectively 29
+RBS:X_effektiver -54
+R:X:den:the 68
+R:X:NX_geben_und:N1_and 107.5
+R:X:NX_effektiver_zu_NX:N1_N2_effectively -83
+R:X:den:to -33
+RBS:1999_</r> -302.5
+R:X:,_NX_zu_geben_NX:to_N1_N2 -577.5
+R:X:den:with -10
+RBS:X_terror -4.5
+R:X:,_NX_zu_geben_und:to_N1_and 470
+R:X:NX_1999:N1_1999 -302.5
+R:X:NX_1999_NX:N2_N1_1999 302.5
+RBS:1999_X 302.5
+R:X:den_NX_zu:to_N1 783.5
+R:X:NX_rahmen_der:N1_the -43
+RBS:X_rahmen -43
+RBS:rahmen_der -43
+RBS:gegen_</r> 22.5
+R:X:gegen:against -2
+RBT:<r>_against -2
+RBT:against_</r> -2
+R:X:._NX:._N1 -79
+RBS:._X -79.5
+RBS:gegen_den -22.5
+R:X:NX_._oktober:october_N1 79.5
+RBS:._oktober 79.5
+R:X:am_NX_._NX:the_N2_N1 -0.5
+R:X:gegen_den_NX:on_N1 2
+RBS:den_terror 20.5
+RBT:on_terror -26
+R:X:NX_den_terror:the_N1_terror 29
+R:X:den_NX_den_NX:the_N1_N2 -110.5
+R:X:den_NX_den_NX:N2_the_N1 -95
+RBT:<unk>_the -1.5
+R:X:krieg:war -4.5
+RBS:<r>_krieg -22
+R:X:musharraf:musharraf 43
+RBS:krieg_</r> -4.5
+RBT:<r>_war -22
+RBS:<r>_musharraf 66.5
+RBS:musharraf_</r> -23.5
+RBT:war_</r> -4.5
+R:X:musharraf_NX:musharraf_imposed_N1 23.5
+RBS:musharraf_X 23.5
+RBT:musharraf_imposed 23.5
+RBS:krieg_gegen 4.5
+R:X:musharraf_NX:musharraf_N1 107
+R:X:krieg_gegen:war_on 24.5
+RBT:war_on -17.5
+RBS:X_gegen -4.5
+R:X:musharraf_NX_,_als_NX:musharraf_N1_as_N2 -20
+R:X:musharraf_NX_,_als_NX:musharraf_N1_N2 -87
+R:X:krieg_gegen_den_NX:war_on_N1 -16
+R:X:krieg_gegen_den_terror:war_on_terror -26
+R:X:pervez:pervez 22
+RBS:<r>_pervez 22
+RBS:pervez_</r> 57.5
+RBS:X_krieg 22
+RBT:<r>_pervez 22
+RBT:pervez_</r> 22
+RBS:pervez_musharraf -57.5
+RBS:X_musharraf -9
+R:X:NX_musharraf:N1_musharraf -9
+R:X:den_NX_gegen_den:the_N1_on -4.5
+R:X:den_NX_den_terror:the_N1_terror -3
+R:X:NX_krieg_gegen_den_terror:N1_war_on_terror 22
+R:X:den_NX_den_terror_NX:N2_the_N1_terror -1.5
+RBT:<r>_project 91
+RBS:hat_</r> 2
+RBS:X_- 14
+R:X:NX_-:,_N1 48.5
+R:X:NX_-:N1_months_of 32
+R:X:NX_-:N1_relief_and 64
+R:X:NX_-:N1_'s -144.5
+RBS:hat_X -198
+R:X:und_NX_terror_NX:and_N2_N1_terror -4.5
+RBT:and_<unk> -4.5
+R:X:sorgen:bring -19
+RBS:X_pervez -22
+RBT:<r>_bring -19
+RBT:bring_</r> -19
+R:X:sorgen:ensure 19
+RBT:<r>_ensure 19
+RBT:ensure_</r> 19
+R:X:NX_-_NX:N1_N2_security -4
+R:X:NX_projekt_NX:N2_N1_project -156
+R:X:NX_-_NX_.:N1_N2_. 18
+R:X:NX_projekt_NX_.:N2_N1_project_. 156
+RBS:<r>_- -14
+RBT:to_ensure 0.5
+R:X:NX_hat:has_N1 -5
+R:X:NX_hat:N1_, 3
+R:X:NX_hat:,_N1 21.5
+R:X:NX_hat:N1_has -17
+R:X:NX_hat:N1_is -0.5
+R:X:-_NX:of_N1 -26
+R:X:-_NX:'s_N1 -58
+R:X:NX_hat_NX:N1_,_N2 -73
+R:X:NX_hat_NX:N1_N2_has 28
+R:X:-_NX:-_N1 122
+R:X:NX_hat_NX:N1_,_N2_has 21
+R:X:-_NX:--_N1 -21
+R:X:-_NX:,_N1 -31
+R:X:stabilität:stability -118
+RBS:<r>_stabilität -129
+RBT:<r>_stability -118
+RBT:stability_</r> -118
+R:X:stabilität:stabilität -11
+RBT:<r>_stabilität -11
+RBT:stabilität_</r> -11
+RBT:<r>_country 253
+RBS:<r>_für 101
+RBS:für_</r> 129
+RBS:X_ihres -16
+R:X:NX_ihres_NX:N1_of_their_N2 -16
+R:X:für:that 129
+RBT:<r>_political -16
+RBS:für_X -129
+R:X:,_NX_und_NX:,_N1_N2 -2
+R:X:für_NX:to_N1 -28
+R:X:NX_stabilität:N1_stability 129
+RBS:X_stabilität 129
+RBS:X_für 22
+RBT:<unk>_with -109
+RBS:,_für -123
+R:X:,_für_NX:,_N1 15.5
+R:X:,_NX_den_NX_zu:to_N2_N1 69
+R:X:NX_für_NX_.:N1_N2_. 22
+RBS:<r>_ihres 16
+R:X:ihres_NX:its_N1 -50
+R:X:ihres_NX:their_N1 66
+R:X:NX_zu_verkaufen_NX:sell_N1_N2 140.5
+RBS:verkaufen_X 140.5
+RBS:<r>_würde -204
+RBS:würde_</r> -117
+R:X:würde:would -204
+RBS:würde_X 126
+R:X:in_NX_hat_NX:in_N1_N2 22
+R:X:NX_dem_NX_pervez:N1_N2_pervez 35.5
+RBS:<r>_halten 284
+RBS:halten_</r> 204
+R:X:NX_dem_NX_pervez_musharraf:N1_N2_pervez_musharraf -57.5
+Shape_S01111_T01011 560.5
+Shape_S01111_T11011 -20.5
+Shape_S01111_T01111 -5
+RBT:<r>_maintain 30
+R:X:halten:halten 284
+RBT:<r>_halten 284
+RBT:halten_</r> 284
+RBS:halten_X -204
+R:X:NX_würde:if_N1 35
+RBS:X_würde 204
+R:X:NX_würde:will_N1 -6
+Shape_S11111_T11010 69
+R:X:NX_würde:would_face_a_N1 -9.5
+RBT:would_face -18.5
+RBT:face_a -18.5
+Shape_S11111_T11110 -57
+R:X:NX_würde:would_N1 78
+R:X:NX_würde:N1_will -10.5
+R:X:NX_würde_NX:would_N1_N2 126
+R:X:NX_würde_.:would_face_a_N1_. -9
+RBS:würde_. -9
+PhraseModel_0 -2973.8953021225416
+R:X:vielleicht:may -177
+PhraseModel_1 -4012.0052074229625
+PhraseModel_2 -1203.5725821427027
+RBS:vielleicht_</r> -284
+PhraseModel_3 2747.8420998127522
+PhraseModel_4 -3205.3163436680484
+PhraseModel_5 720.5
+PhraseModel_6 275
+R:X:vielleicht:vielleicht -107
+RBT:<r>_vielleicht -107
+RBT:vielleicht_</r> -107
+R:X:vielleicht_NX:perhaps_N1 284
+RBS:vielleicht_X 284
+R:X:NX_halten:maintain_the_N1 -29
+RBS:X_halten -284
+RBT:maintain_the -174
+R:X:NX_halten:N1_hold -51
+R:X:NX_halten_NX:N2_maintain_the_N1 -204
+RBT:<unk>_maintain -204
+RBS:<r>_versprechen 30
+RBS:versprechen_</r> -75
+RBT:<r>_commitment 107
+R:X:versprechen_NX:commitment_N1 30
+RBS:versprechen_X 75
+R:X:NX_versprechen:N1_commitment -75
+RBS:X_versprechen -30
+R:X:NX_,_für_NX:N1_,_N2 -138.5
+R:X:NX_versprechen_NX:N1_commitment_N2 45
+RBS:<r>_dass -451
+RBS:dass_</r> -91.5
+R:X:dass_NX:that_N1 -451
+RBS:dass_X 91.5
+R:X:NX_er_sein:N1_to_make_up_for_his -91.5
+RBS:er_sein -91.5
+R:X:seine_NX_und:a_N1_, -15
+R:X:NX_,_NX_und:N1_N2_, 129
+RBS:,_dass 851.5
+R:X:NX_,_dass:N1_keep -27
+R:X:NX_,_dass:N1_said_that -0.5
+R:X:NX_,_dass:N1_to_let -9.5
+R:X:NX_dass:that_N1 -8.5
+RBS:X_dass -400.5
+R:X:NX_dass:N1_let -51.5
+R:X:NX_dass:N1_see -243.5
+R:X:NX_dass:N1_thought -97
+R:X:NX_,_dass_NX:N1_that_N2 134
+Glue -599
+PassThrough -2016.5
+R:X:musharrafs:his 2
+RBS:musharrafs_</r> -29
+R:X:NX_und_den:N1_and_the 22
+RBT:<r>_his 250.5
+RBT:his_</r> 160.5
+R:X:musharrafs:musharraf -1.5
+RBT:<r>_musharraf 135.5
+RBT:musharraf_</r> 41.5
+R:X:NX_,_dass_NX_.:N1_N2_. 91.5
+R:X:musharrafs:musharrafs -29.5
+RBT:<r>_musharrafs -29.5
+RBT:musharrafs_</r> -29.5
+RBS:sie_X 346
+RBS:<r>_X -1369.5
+R:X:dies:so -74.5
+RBS:X_</r> -1743
+RBS:dies_</r> -348
+R:X:dies:so_,_this 47
+RBT:so_, 47
+R:X:sie_NX:it_N1 22
+RBT:,_this 47
+R:X:dies:that -256.5
+R:X:NX_?:N1_? -134.5
+R:X:dies:these -5.5
+RBS:X_? -235
+RBT:<r>_these -5.5
+RBT:these_</r> -5.5
+R:X:NX_?:N1_consulting_? -100.5
+R:X:dies:this -58.5
+R:X:letzter_NX:last_N1 -14
+RBS:<r>_letzter -20
+RBS:letzter_X 19.5
+RBT:<r>_last -2
+R:X:letzter:last 7
+RBS:letzter_</r> -19.5
+R:X:sein:be 1.5
+RBT:last_</r> 7
+R:X:letzter:late 11.5
+RBT:<r>_they -6
+RBS:sein_</r> 68
+RBT:<r>_late 11.5
+R:X:ist_NX:be_N1 464.5
+RBT:<r>_be -10.5
+RBT:late_</r> 11.5
+R:X:sie_NX:they_N1 -22
+RBS:<r>_ist 415.5
+RBT:be_</r> 120
+R:X:letzter:letzter -24.5
+RBS:ist_X 8
+R:X:sein:being -16
+RBT:<r>_letzter -24.5
+R:X:ist_NX:has_N1 16
+RBT:<r>_being -79
+RBT:letzter_</r> -24.5
+R:X:ist_NX:is_at_N1 6
+RBT:being_</r> -16
+R:X:musharrafs_NX:his_N1 -25
+R:X:sein:his 73
+RBS:musharrafs_X 29
+R:X:ist_NX:is_well_N1 6
+R:X:sein:its -15.5
+R:X:musharrafs_NX:musharraf_'s_N1 77.5
+R:X:sein:sein 55
+RBT:musharraf_'s 55.5
+R:X:ist_NX:is_N1 23
+RBT:<r>_sein 55
+R:X:musharrafs_NX:musharraf_N1 -23.5
+R:X:ist_NX:more_N1 -130.5
+RBT:sein_</r> 55
+R:X:NX_letzter:N1_late -26.5
+R:X:ist_NX:N1_be 176
+R:X:ziel:aim -32.5
+RBS:X_letzter 20
+R:X:ist_NX:N1_has -67
+RBS:<r>_ziel -143
+R:X:NX_letzter:N1_'s_last 13
+R:X:ist_NX:N1_is -19
+RBS:ziel_</r> -219
+R:S:NS_NX:N1_N2 -599
+R:X:ist_NX:N1_,_is 18
+RBT:<r>_aim -32.5
+RBS:<r>_S -599
+R:X:ist_NX:N1_it_is 49
+RBT:aim_</r> -32.5
+RBS:S_X -599
+R:X:ist:are -65.5
+R:X:ziel:goal 45
+R:X:NX_letzter_NX:N1_'s_last_N2 33.5
+RBS:ist_</r> -8
+RBT:<r>_goal 45
+R:X:?:? 235
+RBT:goal_</r> 45
+RBS:<r>_? 235
+R:X:ziel:target -22.5
+RBT:<r>_? 235
+RBS:X__ -347
+RBT:<r>_target -22.5
+RBT:?_</r> 235
+RBT:target_</r> -22.5
+R:X:ist:'s -61
+R:X:ziel:targets -18
+RBS:in_</r> -22
+RBT:<r>_targets -18
+RBT:targets_</r> -18
+RBT:<r>_, 24.5
+R:X:ziel:ziel -125
+RBT:,_</r> -38
+R:X:NX___NX:N1___N2 -347
+R:X:dies_NX:so_N1 200
+RBT:<r>_ziel -125
+RBS:dies_X 256
+RBT:ziel_</r> -125
+RBT:<r>_at 23
+R:X:dies_NX:this_to_N1 156.5
+R:X:ziel_NX:goal_N1 49
+RBT:this_to 156.5
+RBS:ziel_X 219
+R:X:dies_NX:this_N1 -100.5
+R:X:ziel_NX:targets_N1 -19
+R:X:dies_ist:could_be 118.5
+R:X:ziel_NX:target_N1 -20
+RBS:dies_ist 92
+R:X:sein_NX:being_able_to_N1 -71.5
+RBT:in_</r> -65.5
+R:X:in:for 31
+RBT:<r>_could 118.5
+RBS:sein_X -68
+RBT:could_be 118.5
+RBT:being_able -63
+RBT:<r>_for 14.5
+RBT:able_to -63
+RBT:for_</r> 14.5
+R:X:sein_NX:be_N1 -10
+R:X:sein_NX:his_N1 184.5
+RBS:X_ist -507.5
+R:X:sein_NX:its_N1 -26.5
+R:X:in:in -53
+R:X:sein_NX:N1_be -174.5
+R:X:NX_ziel:N1_aim -32.5
+RBT:<r>_in -75.5
+RBS:X_ziel 143
+R:X:NX_ziel:N1_goal 20
+R:X:NX_ziel:N1_target -26.5
+R:X:NX_ziel:N1_targets -27
+RBT:<r>_into -270
+R:X:NX_ziel_NX:N1_goal_N2 60
+R:X:NX_ziel_NX:N1_targets_N2 -6
+R:X:NX_sie_NX_,_dass:N1_N2_that 346
+R:X:NX_ziel_NX:N1_target_N2 -6
+R:X:dies_ist_NX:this_is_N1 -26.5
+R:X:NX_ziel_NX:N2_N1_goal 161
+RBT:<r>_of -38
+RBT:of_</r> -17
+R:X:NX_ist_NX:is_N1_N2 -129
+RBS:<r>_die 428.5
+R:X:NX_ist_NX:is_N1_,_N2 16.5
+RBS:die_</r> -116
+RBT:<r>_on -653.5
+RBT:on_</r> 84.5
+R:X:NX_ist_NX:'s_N1_N2 -41.5
+R:X:die:, -9
+RBT:<r>_over 45
+R:X:die:a -5
+R:X:NX_ist_NX:N1_has_N2 -104.5
+R:X:blieben_NX:remained_N1 135
+R:X:die:an -123
+R:X:NX_ist_NX:N1_is_at_N2 -5.5
+RBS:<r>_blieben 187.5
+R:X:NX_ist_NX:N1_is_well_N2 -5
+RBS:blieben_X -13
+RBT:<r>_are -65.5
+RBT:<r>_'s 16
+R:X:NX_ist_NX:N1_is_N2 -31
+RBT:are_</r> -65.5
+RBT:'s_</r> -28.5
+R:X:blieben_NX:N1_remained 81.5
+R:X:NX_ist_NX:N1_,_is_N2 59.5
+R:X:die:by -10
+R:X:die:its 302.5
+RBS:<r>_pakistanis 57
+RBS:pakistanis_</r> 116.5
+RBT:<r>_to 93.5
+RBT:<r>_pakistanis 161
+R:X:NX_ist_NX:N1_N2_has -75
+R:X:die:the -28
+RBT:to_</r> 18
+R:X:NX_ist_NX:N1_N2_is -97.5
+R:X:pakistanis_NX:pakistanis_N1 57
+R:X:NX_ist_NX:N1_,_N2_is -1
+RBT:<r>_those -6
+RBT:<r>_within 20
+RBT:within_</r> 20
+RBS:pakistanis_X -116.5
+R:X:NX_blieben_NX:N1_,_N2_remained -229.5
+R:X:NX_ist_NX:N2_is_N1 -47
+RBS:X_blieben -187.5
+RBT:<unk>_is -21
+R:X:NX_pakistanis:pakistanis_,_N1 235.5
+RBS:X_pakistanis -57
+RBT:pakistanis_, 104
+R:X:NX_pakistanis:N1_pakistanis -119
+R:X:NX_ist_NX:N2_N1_is -46.5
+RBS:blieben_</r> 13
+RBT:<r>_is -251
+R:X:blieben:blieben -29
+RBT:<r>_blieben -29
+RBT:blieben_</r> -29
+R:X:NX_pakistanis_NX:pakistanis_,_N1_,_N2 -23
+RBS:<r>_zu -560
+R:X:NX_pakistanis_NX:N1_pakistanis_N2 -150.5
+RBS:zu_X -717.5
+R:X:NX_blieben:N1_,_remained 42
+RBS:<r>__ 347
+RBS:<r>_ein 37.5
+RBS:ein_</r> -9.5
+RBS:der_</r> -88.5
+R:X:zu_NX:for_N1 43
+R:X:__NX:__N1 -97
+RBT:<r>_- 113
+RBT:-_</r> -4
+R:X:__NX:,_N1 444
+R:X:zu_NX:in_N1 37.5
+RBT:<r>_a -27.5
+RBT:a_</r> -5
+RBS:sie_</r> -346
+RBT:the_</r> 40
+R:X:zu_NX:to_N1 -716
+R:X:zu_NX:with_N1 40.5
+R:X:zu_NX:N1_on 30
+RBT:<r>_the 324.5
+R:X:NX_sie:but_N1 -346
+RBS:X_ein -37.5
+RBT:be_transformed -12
+R:X:medien:media 299.5
+RBS:<r>_medien -71.5
+RBT:<r>_with 54.5
+RBS:medien_</r> -156
+RBT:with_</r> -19
+RBT:<r>_media 299.5
+R:X:NX_ein:N1_has_an -3.5
+RBT:media_</r> 299.5
+R:X:NX_ein:N1_put_forward_a -6
+R:X:medien:medien -371
+RBT:<r>_medien -371
+RBT:medien_</r> -371
+RBS:der_X 45
+RBS:medien_X 156
+R:X:NX_zu_NX:in_N2_N1 -9.5
+RBS:X_zu 339
+RBT:in_<unk> -2.5
+R:X:NX_zu_NX:of_N2_N1 -52.5
+RBT:to_<unk> -102.5
+RBT:<unk>_to 30
+R:X:,_dass_NX:that_N1 317
+R:X:NX_zu_NX:to_N2_N1 19
+R:X:NX_zu_NX:N1_in_N2 -2
+R:X:NX_zu_NX:N1_is_N2 -2
+RBS:X_macht -0.5
+R:X:NX_zu_NX:N1_to_N2 48
+R:X:NX_macht_NX:N1_N2_does -0.5
+R:X:NX_zu_NX:N2_N1_to -28
+R:X:NX_zu_NX_.:to_N2_N1_. 22.5
+RBS:an_</r> 28
+R:X:NX_zu_NX_.:N1_is_N2_. -3.5
+R:X:NX_zu_NX_.:N1_to_N2_. 7.5
+R:X:NX_zu_NX_.:N1_with_N2_. -3
+R:X:NX_zu_NX_.:N1_N2_. -221.5
+R:X:NX_zu_NX_.:N2_N1_. 4.5
+R:X:freien:free -83.5
+RBS:<r>_freien -118
+RBS:freien_</r> -201.5
+RBT:<r>_free 210
+RBT:free_</r> -83.5
+R:X:freien:freien -276
+RBT:<r>_freien -276
+RBT:freien_</r> -276
+RBT:<r>_an 31.5
+R:X:freien_NX:free_N1 248
+RBT:an_</r> -123
+RBS:freien_X 201.5
+R:X:NX_medien:N1_media -90
+RBS:X_medien 71.5
+R:X:amerika:america 193
+RBS:<r>_amerika -36
+R:X:NX_medien_NX:N2_N1_media 5
+R:X:an_NX:in_N1 210
+R:X:freien_NX_.:free_N1_. -6.5
+RBS:amerika_</r> -131
+R:X:NX_medien_NX_.:N2_N1_media_. 151
+RBT:<r>_america 283.5
+RBT:america_</r> 193
+R:X:die_NX:an_N1 -7.5
+R:X:amerika:american -3
+RBS:die_X -45.5
+RBT:<r>_american -3
+RBT:american_</r> -3
+R:X:amerika:amerika -321
+RBS:<r>_jener 62.5
+R:X:die_NX:a_N1 19
+RBT:<r>_amerika -321
+RBS:jener_X 62.5
+RBT:amerika_</r> -321
+R:X:jener_NX:the_N1 62.5
+R:X:an_NX:to_N1 -210
+RBS:X_jener -62.5
+RBS:amerika_X 131
+R:X:amerika_NX:america_N1 107
+R:X:die_NX:is_N1 -2.5
+RBS:an_der -28
+R:X:auf:, -5
+R:X:die_NX:its_N1 -14
+RBS:auf_</r> 46.5
+R:X:die_NX:'s_N1 46.5
+RBS:X_der 71
+R:X:NX_der:N1_for -74
+R:X:NX_der:N1_in -43
+R:X:auf:in -5.5
+RBT:<r>_choice -103
+R:X:die_NX:the_N1 -86.5
+RBT:<r>_decision 103
+R:X:auf:on 60
+R:X:die_NX:those_N1 -6
+R:X:NX_der:N1_to 72
+R:X:entscheidung_NX:choice_is_N1 -103
+R:X:die_NX:with_N1 73.5
+R:X:auf:auf -3
+RBT:choice_is -103
+RBT:<r>_auf -3
+R:X:entscheidung_NX:decision_N1 103
+R:X:die_NX:,_N1 57
+R:X:die_NX:N1_is -0.5
+RBT:auf_</r> -3
+R:X:die_NX:N1_'s -1
+RBS:auf_X -46.5
+R:X:die_NX:N1_the -1
+R:X:NX_freien:N1_free 158
+RBT:of_<unk> -13
+RBS:X_freien 118
+R:X:NX_der_NX:over_N2_N1 45
+R:X:NX_freien_NX:N1_free_N2 -34
+R:X:NX_freien_NX:N1_free_,_N2 -6
+RBT:over_<unk> 45
+R:X:die_NX_medien:the_N1_media 5.5
+R:X:auf_NX:in_N1 -46.5
+RBT:the_<unk> -0.5
+R:X:auf_NX:on_N1 66
+R:X:auf_NX:to_N1 -2
+R:X:auf_NX:,_N1 -18
+RBS:X_amerika 36
+RBT:<r>_may -177
+RBS:und_die 139.5
+RBT:may_</r> -177
+RBT:<r>_<unk> 585.5
+RBT:<r>_would -18.5
+RBS:X_die -568
+RBT:would_</r> -204
+R:X:NX_die:the_N1 34.5
+R:X:NX_amerika_NX:N2_N1_america 36
+R:X:terroranschläge:terrorist -22
+R:X:NX_die:,_N1 -42
+R:X:NX_die:N1_, -173
+RBS:<r>_terroranschläge -161.5
+RBS:der_macht 0.5
+R:X:NX_die:-_N1 -5
+RBS:terroranschläge_</r> -46
+R:X:NX_die:N1_a -1
+RBT:<r>_terrorist -119.5
+R:X:NX_der_macht_NX:N1_hold_N2_power 28
+RBT:terrorist_</r> -22
+R:X:,:, -2.5
+RBT:terrorist_attacks 77.5
+RBS:<r>_, -182
+RBT:attacks_</r> 28
+RBS:,_</r> -160.5
+R:X:terroranschläge:terroranschläge -52
+RBT:<r>_terroranschläge -52
+RBT:<r>__ -139
+RBT:terroranschläge_</r> -52
+R:X:NX_die:N1_its -128.5
+RBS:terroranschläge_X 46
+RBT:<r>_-- -64
+R:X:terroranschläge_NX:terrorist_attacks_N1 -87.5
+RBT:<r>_by -10
+RBT:by_</r> -10
+R:X:,:out -3.5
+RBT:<r>_out -3.5
+R:X:und_die_NX:and_N1 218
+RBT:out_</r> -3.5
+RBT:<r>_that -261.5
+R:X:NX_die_NX:the_N1_N2 -1
+RBT:that_</r> -127.5
+R:X:NX_die_NX:the_N2_N1 -4
+RBS:,_X -335
+RBT:,_as -40
+R:X:,_NX:in_N1 -239
+R:X:,_NX:of_N1 -4
+R:X:,_NX:on_N1 -166
+R:X:,_NX:to_N1 649
+R:X:NX_die_NX:N1_the_N2 -4
+R:X:,_NX:,_N1 -399
+R:X:,_NX:__N1 -42
+R:X:,_NX:--_N1 -102
+R:X:,_an:to 28
+RBS:,_an 28
+R:X:NX_die_NX:N1_,_N2 -5
+R:X:NX_die_NX:N1_N2_the -4
+RBS:X_an -28
+RBS:die_terroranschläge 161.5
+R:X:die_terroranschläge:,_terrorist_attacks 28
+RBT:,_terrorist 175
+R:X:die_terroranschläge_NX:,_terrorist_attacks_N1 147
+R:X:NX_so:N1_as -1.5
+R:X:justiz:judiciary -90
+RBS:<r>_justiz -1
+RBS:justiz_</r> -220.5
+R:X:NX_so:N1_that -14
+RBT:<r>_judiciary 215
+R:X:NX_so:N1_the 15.5
+RBT:judiciary_</r> -90
+R:X:justiz:justiz -216
+RBT:<r>_justiz -216
+RBT:justiz_</r> -216
+R:X:justiz_NX:judiciary_N1 305
+RBS:justiz_X 205
+RBS:<r>_brachten -28
+RBS:justiz_und 15.5
+RBS:brachten_</r> -175
+R:X:NX_und_die:'s_N1_and -5
+R:X:brachten:brachten -175
+RBT:<r>_brachten -175
+RBT:brachten_</r> -175
+R:X:NX_an_der:N1_the -0.5
+R:X:brachten_NX:N1_brought 147
+RBS:brachten_X 175
+R:X:NX_die_terroranschläge_NX:,_terrorist_attacks_N2_N1 -13.5
+R:X:NX_und_die:N1_'s -12
+R:X:NX_und_die_NX:'s_N2_N1 -16
+RBS:<r>_2001 -14.5
+RBS:2001_</r> 28
+RBT:<r>_2001 37.5
+R:X:NX_und_die_NX:N1_and_N2 -159
+RBT:2001_</r> 28
+R:X:2001_NX:2001_N1 147
+RBS:2001_X -28
+R:X:NX_brachten_NX:N1_N2_brought 28
+RBS:X_brachten 28
+RBT:,_<unk> -109.5
+R:X:2001_NX_die_NX:2001_,_N2_N1 -161.5
+R:X:unabhängige:independent 38
+RBT:2001_, -109.5
+RBS:<r>_unabhängige 127
+RBS:unabhängige_</r> -197
+RBT:<r>_independent 343
+RBT:independent_</r> 38
+RBT:<r>_september -13.5
+R:X:unabhängige:unabhängige -198
+RBT:<r>_unabhängige -198
+RBS:ein_X 9.5
+RBT:unabhängige_</r> -198
+RBS:september_X -14.5
+R:X:unabhängige_NX:independent_N1 287
+R:X:ein_NX:an_N1 132
+R:X:ein_NX:any_N1 25
+RBS:unabhängige_X 197
+R:X:NX_justiz:N1_judiciary 85.5
+R:X:NX_an_der_macht_NX:N1_of_power_N2 -27.5
+RBS:X_justiz 1
+R:X:NX_justiz_NX:N1_judiciary_N2 -43
+R:X:NX_justiz_und:N1_judiciary_and 15.5
+RBS:<r>_11 -13.5
+R:X:NX_unabhängige:N1_independent -37
+R:X:ein_NX:a_N1 -93
+RBS:X_unabhängige -127
+R:X:ein_NX:one_N1 -15
+R:X:NX_unabhängige_NX:N1_independent_N2 -90
+R:X:ein_NX:-_N1 -11.5
+R:X:NX_ein_NX:an_N1_N2 -6
+R:X:NX_ein_NX:be_transformed_N1_N2 -22
+RBS:X_, -3.5
+RBS:september_2001 14.5
+RBT:,_2001 14.5
+R:X:NX_,:to_N1 68
+R:X:NX_,:N1__ 1
+R:X:NX_,:N1_-- -172.5
+R:X:11_._september_2001_NX:september_11_,_2001_N1 -13.5
+R:X:die_NX_und_NX:the_N1_N2 -10
+R:X:NX_,:N1_for -127.5
+R:X:NX_,:N1_in -13.5
+R:X:NX_,:N1_of -55
+R:X:NX_,:N1_on 257.5
+R:X:NX_,:N1_out -58
+RBS:am_11 13.5
+R:X:die_NX_justiz_NX_die:the_N1_judiciary_N2 -57
+R:X:NX_,:N1_refuses_to -232.5
+R:X:die_NX_und_die:the_N1_and 148
+R:X:die_NX_und_die:the_N1_and_the -2.5
+RBT:the_september 13.5
+R:X:die_NX_die_NX:the_N1_N2 -3
+R:X:am_11_._september_NX:the_september_11_,_N1 -14.5
+R:X:die_NX_und_die_NX:the_N1_and_N2 -32
+RBS:zu_</r> 672
+R:X:NX_,_NX:N1_,_N2 -78
+R:X:NX_,_NX:N1_N2_, 80
+R:X:am_11_._september_2001:the_september_11_,_2001 28
+R:X:zu:for -5
+R:X:zu:in -7
+R:X:zu:to 23
+R:X:taliban:taliban -251.5
+RBS:<r>_taliban -223.5
+RBS:taliban_</r> -157.5
+R:X:zu:with -6
+R:X:verzweifelten:desperate 28.5
+RBT:<r>_taliban -205.5
+RBT:<r>_desperate 28.5
+RBT:taliban_</r> -107
+RBT:desperate_</r> 28.5
+R:X:taliban_NX:taliban_N1 28
+R:X:verzweifelten:verzweifelten -28.5
+RBS:taliban_X 157.5
+R:X:NX_zu:to_N1 -229
+RBT:<r>_verzweifelten -28.5
+R:X:den_taliban:the_taliban 144.5
+RBT:verzweifelten_</r> -28.5
+RBS:den_taliban 223.5
+RBT:the_taliban 144.5
+R:X:NX_zu:N1_for -152
+R:X:NX_zu:N1_in -6
+R:X:NX_zu:N1_is 251
+R:X:NX_zu:N1_of -49.5
+RBS:<r>_dem 22
+RBT:<r>_its 458
+RBT:its_</r> 337
+R:X:NX_den_taliban:N1_taliban -50.5
+R:X:NX_den_taliban_NX:N1_taliban_N2 -2.5
+R:X:NX_den_taliban_NX:N2_N1_taliban 132
+R:X:erklärte:declared -8
+RBS:<r>_erklärte -185.5
+RBS:erklärte_</r> -124.5
+RBT:<r>_declaring -9
+R:X:erklärte:erklärte -116.5
+RBT:<r>_erklärte -116.5
+RBT:erklärte_</r> -116.5
+R:X:erklärte_NX:declared_N1 -52
+RBS:erklärte_X -61
+RBS:jener_</r> -62.5
+R:X:erklärte_NX:declaring_N1 -9
+RBS:erklärte_, 185.5
+R:X:NX_jener:N1_of -62.5
+R:X:dem_NX:the_N1 22
+R:X:verkaufen:sell -153
+RBS:<r>_verkaufen -153
+RBS:verkaufen_</r> -140.5
+RBT:sell_</r> -153
+RBS:bereit_</r> 86
+RBS:zu_verkaufen 153
+RBS:<r>_bemühen -2.5
+R:X:bereit:bereit 86
+RBT:<r>_bereit 86
+RBT:bereit_</r> 86
+R:X:bereit_NX:ready_N1 -31
+RBS:bereit_X -86
+R:X:bereit_NX:N1_ready -55
+RBS:X_zum 30
+R:X:bemühen:bemühen -2.5
+R:X:NX_erklärte_,:N1_, 110
+RBT:<r>_bemühen -2.5
+RBS:X_erklärte 185.5
+RBT:bemühen_</r> -2.5
+R:X:NX_erklärte_,_NX:N1_,_N2 75.5
+RBS:in_X 22
+RBS:<r>_sich -17.5
+R:X:NX_zu_verkaufen:sell_N1 12.5
+RBS:sich_</r> -17.5
+R:X:NX_zum_NX:N2_to_further_N1 30
+RBS:<r>_das 45
+RBS:das_</r> 2.5
+RBT:to_further 30
+RBT:<r>_it -381
+RBT:it_</r> 3
+RBT:<r>_so 172.5
+RBT:so_</r> -74.5
+RBT:<r>_this 9.5
+RBT:this_</r> -11.5
+RBS:X_dem -22
+R:X:das_NX:a_growing_N1 77
+RBS:das_X -2.5
+RBT:a_growing -41
+R:X:das_NX:be_N1 169
+R:X:das_NX:its_N1 -95
+R:X:das_NX:so_N1 -38
+RBS:X_sein 91.5
+R:X:das_NX:the_N1 -80
+done
+
+---
+Best iteration: 2 [SCORE 'stupid_bleu'=0.37119].
+This took 0.6 min.
diff --git a/training/dtrain/examples/standard/nc-wmt11.de.gz b/training/dtrain/examples/standard/nc-wmt11.de.gz
new file mode 100644
index 00000000..0741fd92
--- /dev/null
+++ b/training/dtrain/examples/standard/nc-wmt11.de.gz
Binary files differ
diff --git a/training/dtrain/examples/standard/nc-wmt11.en.gz b/training/dtrain/examples/standard/nc-wmt11.en.gz
new file mode 100644
index 00000000..1c0bd401
--- /dev/null
+++ b/training/dtrain/examples/standard/nc-wmt11.en.gz
Binary files differ
diff --git a/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz b/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz
new file mode 100644
index 00000000..7ce81057
--- /dev/null
+++ b/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz
Binary files differ
diff --git a/training/dtrain/examples/standard/nc-wmt11.grammar.gz b/training/dtrain/examples/standard/nc-wmt11.grammar.gz
new file mode 100644
index 00000000..ce4024a1
--- /dev/null
+++ b/training/dtrain/examples/standard/nc-wmt11.grammar.gz
Binary files differ
diff --git a/training/dtrain/test/toy/cdec.ini b/training/dtrain/examples/toy/cdec.ini
index 98b02d44..b14f4819 100644
--- a/training/dtrain/test/toy/cdec.ini
+++ b/training/dtrain/examples/toy/cdec.ini
@@ -1,2 +1,3 @@
formalism=scfg
add_pass_through_rules=true
+grammar=grammar.gz
diff --git a/training/dtrain/test/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini
index a091732f..cd715f26 100644
--- a/training/dtrain/test/toy/dtrain.ini
+++ b/training/dtrain/examples/toy/dtrain.ini
@@ -1,5 +1,6 @@
-decoder_config=test/toy/cdec.ini
-input=test/toy/input
+decoder_config=cdec.ini
+input=src
+refs=tgt
output=-
print_weights=logp shell_rule house_rule small_rule little_rule PassThrough
k=4
diff --git a/training/dtrain/examples/toy/expected-output b/training/dtrain/examples/toy/expected-output
new file mode 100644
index 00000000..1da2aadd
--- /dev/null
+++ b/training/dtrain/examples/toy/expected-output
@@ -0,0 +1,77 @@
+Warning: hi_lo only works with pair_sampling XYX.
+ cdec cfg 'cdec.ini'
+Seeding random number sequence to 1664825829
+
+dtrain
+Parameters:
+ k 4
+ N 4
+ T 2
+ scorer 'bleu'
+ sample from 'kbest'
+ filter 'uniq'
+ learning rate 1
+ gamma 0
+ loss margin 0
+ pairs 'all'
+ pair threshold 0
+ select weights 'last'
+ l1 reg 0 'none'
+ max pairs 4294967295
+ cdec cfg 'cdec.ini'
+ input 'src'
+ refs 'tgt'
+ output '-'
+(a dot represents 10 inputs)
+Iteration #1 of 2.
+ 2
+WEIGHTS
+ logp = +0
+ shell_rule = -1
+ house_rule = +2
+ small_rule = -2
+ little_rule = +3
+ PassThrough = -5
+ ---
+ 1best avg score: 0.5 (+0.5)
+ 1best avg model score: 2.5 (+2.5)
+ avg # pairs: 4
+ avg # rank err: 1.5
+ avg # margin viol: 0
+ non0 feature count: 6
+ avg list sz: 4
+ avg f count: 2.875
+(time 0 min, 0 s/S)
+
+Iteration #2 of 2.
+ 2
+WEIGHTS
+ logp = +0
+ shell_rule = -1
+ house_rule = +2
+ small_rule = -2
+ little_rule = +3
+ PassThrough = -5
+ ---
+ 1best avg score: 1 (+0.5)
+ 1best avg model score: 5 (+2.5)
+ avg # pairs: 5
+ avg # rank err: 0
+ avg # margin viol: 0
+ non0 feature count: 6
+ avg list sz: 4
+ avg f count: 3
+(time 0 min, 0 s/S)
+
+Writing weights file to '-' ...
+house_rule 2
+little_rule 3
+Glue -4
+PassThrough -5
+small_rule -2
+shell_rule -1
+done
+
+---
+Best iteration: 2 [SCORE 'bleu'=1].
+This took 0 min.
diff --git a/training/dtrain/examples/toy/grammar.gz b/training/dtrain/examples/toy/grammar.gz
new file mode 100644
index 00000000..8eb0d29e
--- /dev/null
+++ b/training/dtrain/examples/toy/grammar.gz
Binary files differ
diff --git a/training/dtrain/examples/toy/src b/training/dtrain/examples/toy/src
new file mode 100644
index 00000000..87e39ef2
--- /dev/null
+++ b/training/dtrain/examples/toy/src
@@ -0,0 +1,2 @@
+ich sah ein kleines haus
+ich fand ein kleines haus
diff --git a/training/dtrain/examples/toy/tgt b/training/dtrain/examples/toy/tgt
new file mode 100644
index 00000000..174926b3
--- /dev/null
+++ b/training/dtrain/examples/toy/tgt
@@ -0,0 +1,2 @@
+i saw a little house
+i found a little house
diff --git a/training/dtrain/lplp.rb b/training/dtrain/lplp.rb
index f0cd58c5..86e835e8 100755
--- a/training/dtrain/lplp.rb
+++ b/training/dtrain/lplp.rb
@@ -84,34 +84,28 @@ def _test()
end
#_test()
-# actually do something
+
def usage()
- puts "lplp.rb <l0,l1,l2,linfty,mean,median> <cut|select_k> <k|threshold> [n] < <input>"
+ puts "lplp.rb <l0,l1,l2,linfty,mean,median> <cut|select_k> <k|threshold> <#shards> < <input>"
puts " l0...: norms for selection"
puts "select_k: only output top k (according to the norm of their column vector) features"
puts " cut: output features with weight >= threshold"
puts " n: if we do not have a shard count use this number for averaging"
- exit
+ exit 1
end
-if ARGV.size < 3 then usage end
+if ARGV.size < 4 then usage end
norm_fun = method(ARGV[0].to_sym)
type = ARGV[1]
x = ARGV[2].to_f
-
-shard_count_key = "__SHARD_COUNT__"
+shard_count = ARGV[3].to_f
STDIN.set_encoding 'utf-8'
STDOUT.set_encoding 'utf-8'
w = {}
-shard_count = 0
while line = STDIN.gets
key, val = line.split /\s+/
- if key == shard_count_key
- shard_count += 1
- next
- end
if w.has_key? key
w[key].push val.to_f
else
@@ -119,8 +113,6 @@ while line = STDIN.gets
end
end
-if ARGV.size == 4 then shard_count = ARGV[3].to_f end
-
if type == 'cut'
cut(w, norm_fun, shard_count, x)
elsif type == 'select_k'
diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 24e7f49e..e661416e 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -3,16 +3,15 @@
require 'trollop'
def usage
- if ARGV.size != 8
- STDERR.write "Usage: "
- STDERR.write "ruby parallelize.rb -c <dtrain.ini> -e <epochs> [--randomize/-z] [--reshard/-y] -s <#shards|0> -p <at once> -i <input> -r <refs> [--qsub/-q] --dtrain_binary <path to dtrain binary> -l \"l2 select_k 100000\"\n"
- exit 1
- end
+ STDERR.write "Usage: "
+ STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> -r <refs> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"]\n"
+ exit 1
end
opts = Trollop::options do
opt :config, "dtrain config file", :type => :string
- opt :epochs, "number of epochs", :type => :int
+ opt :epochs, "number of epochs", :type => :int, :default => 10
+ opt :lplp_args, "arguments for lplp.rb", :type => :string, :default => "l2 select_k 100000"
opt :randomize, "randomize shards before each epoch", :type => :bool, :short => '-z', :default => false
opt :reshard, "reshard after each epoch", :type => :bool, :short => '-y', :default => false
opt :shards, "number of shards", :type => :int
@@ -21,8 +20,8 @@ opts = Trollop::options do
opt :references, "references", :type => :string
opt :qsub, "use qsub", :type => :bool, :default => false
opt :dtrain_binary, "path to dtrain binary", :type => :string
- opt :lplp_args, "arguments for lplp arguments", :type => :string, :default => "l2 select_k 100000"
end
+usage if not opts[:config]&&opts[:shards]&&opts[:input]&&opts[:references]
dtrain_dir = File.expand_path File.dirname(__FILE__)
@@ -32,16 +31,14 @@ else
dtrain_bin = opts[:dtrain_binary]
end
ruby = '/usr/bin/ruby'
-lplp_rb = "#{dtrain_dir}/hstreaming/lplp.rb"
+lplp_rb = "#{dtrain_dir}/lplp.rb"
lplp_args = opts[:lplp_args]
cat = '/bin/cat'
ini = opts[:config]
epochs = opts[:epochs]
-rand = false
-rand = true if opts[:randomize]
-reshard = false
-reshard = true if opts[:reshard]
+rand = opts[:randomize]
+reshard = opts[:reshard]
predefined_shards = false
if opts[:shards] == 0
predefined_shards = true
@@ -49,11 +46,10 @@ if opts[:shards] == 0
else
num_shards = opts[:shards]
end
-shards_at_once = opts[:processes_at_once]
input = opts[:input]
refs = opts[:references]
-use_qsub = false
-use_qsub = true if opts[:qsub]
+use_qsub = opts[:qsub]
+shards_at_once = opts[:processes_at_once]
`mkdir work`
diff --git a/training/dtrain/test/example/README b/training/dtrain/test/example/README
deleted file mode 100644
index 2df77086..00000000
--- a/training/dtrain/test/example/README
+++ /dev/null
@@ -1,8 +0,0 @@
-Small example of input format for distributed training.
-Call dtrain from this folder with ../../dtrain -c test/example/dtrain.ini .
-
-For this to work, undef 'DTRAIN_LOCAL' in dtrain.h
-and recompile.
-
-data can be found here: http://simianer.de/#dtrain
-
diff --git a/training/dtrain/test/example/dtrain.ini b/training/dtrain/test/example/dtrain.ini
deleted file mode 100644
index 97fce7f0..00000000
--- a/training/dtrain/test/example/dtrain.ini
+++ /dev/null
@@ -1,22 +0,0 @@
-input=./nc-wmt11.1k.gz # use '-' for STDIN
-output=- # a weights file (add .gz for gzip compression) or STDOUT '-'
-select_weights=VOID # don't output weights
-decoder_config=./cdec.ini # config for cdec
-# weights for these features will be printed on each iteration
-print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
-tmp=/tmp
-stop_after=10 # stop epoch after 10 inputs
-
-# interesting stuff
-epochs=2 # run over input 2 times
-k=100 # use 100best lists
-N=4 # optimize (approx) BLEU4
-scorer=stupid_bleu # use 'stupid' BLEU+1
-learning_rate=1.0 # learning rate, don't care if gamma=0 (perceptron)
-gamma=0 # use SVM reg
-sample_from=kbest # use kbest lists (as opposed to forest)
-filter=uniq # only unique entries in kbest (surface form)
-pair_sampling=XYX
-hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here
-pair_threshold=0 # minimum distance in BLEU (this will still only use pairs with diff > 0)
-loss_margin=0
diff --git a/training/dtrain/test/example/expected-output b/training/dtrain/test/example/expected-output
deleted file mode 100644
index 05326763..00000000
--- a/training/dtrain/test/example/expected-output
+++ /dev/null
@@ -1,89 +0,0 @@
- cdec cfg 'test/example/cdec.ini'
-Loading the LM will be faster if you build a binary file.
-Reading test/example/nc-wmt11.en.srilm.gz
-----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
-****************************************************************************************************
- Example feature: Shape_S00000_T00000
-Seeding random number sequence to 2912000813
-
-dtrain
-Parameters:
- k 100
- N 4
- T 2
- scorer 'stupid_bleu'
- sample from 'kbest'
- filter 'uniq'
- learning rate 1
- gamma 0
- loss margin 0
- pairs 'XYX'
- hi lo 0.1
- pair threshold 0
- select weights 'VOID'
- l1 reg 0 'none'
- max pairs 4294967295
- cdec cfg 'test/example/cdec.ini'
- input 'test/example/nc-wmt11.1k.gz'
- output '-'
- stop_after 10
-(a dot represents 10 inputs)
-Iteration #1 of 2.
- . 10
-Stopping after 10 input sentences.
-WEIGHTS
- Glue = -637
- WordPenalty = +1064
- LanguageModel = +1175.3
- LanguageModel_OOV = -1437
- PhraseModel_0 = +1935.6
- PhraseModel_1 = +2499.3
- PhraseModel_2 = +964.96
- PhraseModel_3 = +1410.8
- PhraseModel_4 = -5977.9
- PhraseModel_5 = +522
- PhraseModel_6 = +1089
- PassThrough = -1308
- ---
- 1best avg score: 0.16963 (+0.16963)
- 1best avg model score: 64485 (+64485)
- avg # pairs: 1494.4
- avg # rank err: 702.6
- avg # margin viol: 0
- non0 feature count: 528
- avg list sz: 85.7
- avg f count: 102.75
-(time 0.083 min, 0.5 s/S)
-
-Iteration #2 of 2.
- . 10
-WEIGHTS
- Glue = -1196
- WordPenalty = +809.52
- LanguageModel = +3112.1
- LanguageModel_OOV = -1464
- PhraseModel_0 = +3895.5
- PhraseModel_1 = +4683.4
- PhraseModel_2 = +1092.8
- PhraseModel_3 = +1079.6
- PhraseModel_4 = -6827.7
- PhraseModel_5 = -888
- PhraseModel_6 = +142
- PassThrough = -1335
- ---
- 1best avg score: 0.277 (+0.10736)
- 1best avg model score: -3110.5 (-67595)
- avg # pairs: 1144.2
- avg # rank err: 529.1
- avg # margin viol: 0
- non0 feature count: 859
- avg list sz: 74.9
- avg f count: 112.84
-(time 0.067 min, 0.4 s/S)
-
-Writing weights file to '-' ...
-done
-
----
-Best iteration: 2 [SCORE 'stupid_bleu'=0.277].
-This took 0.15 min.
diff --git a/training/dtrain/test/parallelize/in b/training/dtrain/test/parallelize/in
deleted file mode 100644
index 3b7dec39..00000000
--- a/training/dtrain/test/parallelize/in
+++ /dev/null
@@ -1,10 +0,0 @@
-<seg grammar="g/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg>
-<seg grammar="g/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg>
-<seg grammar="g/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg>
-<seg grammar="g/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg>
-<seg grammar="g/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg>
-<seg grammar="g/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg>
-<seg grammar="g/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg>
-<seg grammar="g/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg>
-<seg grammar="g/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg>
-<seg grammar="g/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg>
diff --git a/training/dtrain/test/toy/input b/training/dtrain/test/toy/input
deleted file mode 100644
index 4d10a9ea..00000000
--- a/training/dtrain/test/toy/input
+++ /dev/null
@@ -1,2 +0,0 @@
-0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0
-1 ich fand ein kleines haus i found a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0