From 2a48d73eb794fdd736d1df035c8a31af887cde0a Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Fri, 15 Mar 2013 11:31:18 +0100
Subject: overhauled ruby scripts and examples
---
training/dtrain/dtrain.cc | 2 -
training/dtrain/examples/parallelized/README | 5 +
training/dtrain/examples/parallelized/cdec.ini | 22 +
training/dtrain/examples/parallelized/dtrain.ini | 16 +
.../examples/parallelized/grammar/grammar.out.0.gz | Bin 0 -> 8318 bytes
.../examples/parallelized/grammar/grammar.out.1.gz | Bin 0 -> 358560 bytes
.../examples/parallelized/grammar/grammar.out.2.gz | Bin 0 -> 1014466 bytes
.../examples/parallelized/grammar/grammar.out.3.gz | Bin 0 -> 391811 bytes
.../examples/parallelized/grammar/grammar.out.4.gz | Bin 0 -> 149590 bytes
.../examples/parallelized/grammar/grammar.out.5.gz | Bin 0 -> 537024 bytes
.../examples/parallelized/grammar/grammar.out.6.gz | Bin 0 -> 291286 bytes
.../examples/parallelized/grammar/grammar.out.7.gz | Bin 0 -> 1038140 bytes
.../examples/parallelized/grammar/grammar.out.8.gz | Bin 0 -> 419889 bytes
.../examples/parallelized/grammar/grammar.out.9.gz | Bin 0 -> 409140 bytes
training/dtrain/examples/parallelized/in | 10 +
training/dtrain/examples/parallelized/refs | 10 +
training/dtrain/examples/parallelized/work/out.0.0 | 61 +
training/dtrain/examples/parallelized/work/out.0.1 | 62 +
training/dtrain/examples/parallelized/work/out.1.0 | 61 +
training/dtrain/examples/parallelized/work/out.1.1 | 62 +
.../dtrain/examples/parallelized/work/shard.0.0.in | 5 +
.../examples/parallelized/work/shard.0.0.refs | 5 +
.../dtrain/examples/parallelized/work/shard.1.0.in | 5 +
.../examples/parallelized/work/shard.1.0.refs | 5 +
.../dtrain/examples/parallelized/work/weights.0 | 12 +
.../dtrain/examples/parallelized/work/weights.0.0 | 12 +
.../dtrain/examples/parallelized/work/weights.0.1 | 12 +
.../dtrain/examples/parallelized/work/weights.1 | 12 +
.../dtrain/examples/parallelized/work/weights.1.0 | 11 +
.../dtrain/examples/parallelized/work/weights.1.1 | 12 +
training/dtrain/examples/standard/README | 2 +
training/dtrain/examples/standard/cdec.ini | 26 +
training/dtrain/examples/standard/dtrain.ini | 24 +
training/dtrain/examples/standard/expected-output | 1206 ++++++++++++++++++++
training/dtrain/examples/standard/nc-wmt11.de.gz | Bin 0 -> 58324 bytes
training/dtrain/examples/standard/nc-wmt11.en.gz | Bin 0 -> 49600 bytes
.../dtrain/examples/standard/nc-wmt11.en.srilm.gz | Bin 0 -> 16017291 bytes
.../dtrain/examples/standard/nc-wmt11.grammar.gz | Bin 0 -> 1399924 bytes
training/dtrain/examples/toy/cdec.ini | 3 +
training/dtrain/examples/toy/dtrain.ini | 13 +
training/dtrain/examples/toy/expected-output | 77 ++
training/dtrain/examples/toy/grammar.gz | Bin 0 -> 219 bytes
training/dtrain/examples/toy/src | 2 +
training/dtrain/examples/toy/tgt | 2 +
training/dtrain/lplp.rb | 18 +-
training/dtrain/parallelize.rb | 26 +-
training/dtrain/test/example/README | 8 -
training/dtrain/test/example/cdec.ini | 25 -
training/dtrain/test/example/dtrain.ini | 22 -
training/dtrain/test/example/expected-output | 89 --
training/dtrain/test/parallelize/README | 5 -
training/dtrain/test/parallelize/cdec.ini | 22 -
training/dtrain/test/parallelize/dtrain.ini | 15 -
.../dtrain/test/parallelize/g/grammar.out.0.gz | Bin 8318 -> 0 bytes
.../dtrain/test/parallelize/g/grammar.out.1.gz | Bin 358560 -> 0 bytes
.../dtrain/test/parallelize/g/grammar.out.2.gz | Bin 1014466 -> 0 bytes
.../dtrain/test/parallelize/g/grammar.out.3.gz | Bin 391811 -> 0 bytes
.../dtrain/test/parallelize/g/grammar.out.4.gz | Bin 149590 -> 0 bytes
.../dtrain/test/parallelize/g/grammar.out.5.gz | Bin 537024 -> 0 bytes
.../dtrain/test/parallelize/g/grammar.out.6.gz | Bin 291286 -> 0 bytes
.../dtrain/test/parallelize/g/grammar.out.7.gz | Bin 1038140 -> 0 bytes
.../dtrain/test/parallelize/g/grammar.out.8.gz | Bin 419889 -> 0 bytes
.../dtrain/test/parallelize/g/grammar.out.9.gz | Bin 409140 -> 0 bytes
training/dtrain/test/parallelize/in | 10 -
training/dtrain/test/parallelize/refs | 10 -
training/dtrain/test/toy/cdec.ini | 2 -
training/dtrain/test/toy/dtrain.ini | 12 -
training/dtrain/test/toy/input | 2 -
68 files changed, 1771 insertions(+), 252 deletions(-)
create mode 100644 training/dtrain/examples/parallelized/README
create mode 100644 training/dtrain/examples/parallelized/cdec.ini
create mode 100644 training/dtrain/examples/parallelized/dtrain.ini
create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.0.gz
create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.1.gz
create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.2.gz
create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.3.gz
create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.4.gz
create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.5.gz
create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.6.gz
create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.7.gz
create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.8.gz
create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.9.gz
create mode 100644 training/dtrain/examples/parallelized/in
create mode 100644 training/dtrain/examples/parallelized/refs
create mode 100644 training/dtrain/examples/parallelized/work/out.0.0
create mode 100644 training/dtrain/examples/parallelized/work/out.0.1
create mode 100644 training/dtrain/examples/parallelized/work/out.1.0
create mode 100644 training/dtrain/examples/parallelized/work/out.1.1
create mode 100644 training/dtrain/examples/parallelized/work/shard.0.0.in
create mode 100644 training/dtrain/examples/parallelized/work/shard.0.0.refs
create mode 100644 training/dtrain/examples/parallelized/work/shard.1.0.in
create mode 100644 training/dtrain/examples/parallelized/work/shard.1.0.refs
create mode 100644 training/dtrain/examples/parallelized/work/weights.0
create mode 100644 training/dtrain/examples/parallelized/work/weights.0.0
create mode 100644 training/dtrain/examples/parallelized/work/weights.0.1
create mode 100644 training/dtrain/examples/parallelized/work/weights.1
create mode 100644 training/dtrain/examples/parallelized/work/weights.1.0
create mode 100644 training/dtrain/examples/parallelized/work/weights.1.1
create mode 100644 training/dtrain/examples/standard/README
create mode 100644 training/dtrain/examples/standard/cdec.ini
create mode 100644 training/dtrain/examples/standard/dtrain.ini
create mode 100644 training/dtrain/examples/standard/expected-output
create mode 100644 training/dtrain/examples/standard/nc-wmt11.de.gz
create mode 100644 training/dtrain/examples/standard/nc-wmt11.en.gz
create mode 100644 training/dtrain/examples/standard/nc-wmt11.en.srilm.gz
create mode 100644 training/dtrain/examples/standard/nc-wmt11.grammar.gz
create mode 100644 training/dtrain/examples/toy/cdec.ini
create mode 100644 training/dtrain/examples/toy/dtrain.ini
create mode 100644 training/dtrain/examples/toy/expected-output
create mode 100644 training/dtrain/examples/toy/grammar.gz
create mode 100644 training/dtrain/examples/toy/src
create mode 100644 training/dtrain/examples/toy/tgt
delete mode 100644 training/dtrain/test/example/README
delete mode 100644 training/dtrain/test/example/cdec.ini
delete mode 100644 training/dtrain/test/example/dtrain.ini
delete mode 100644 training/dtrain/test/example/expected-output
delete mode 100644 training/dtrain/test/parallelize/README
delete mode 100644 training/dtrain/test/parallelize/cdec.ini
delete mode 100644 training/dtrain/test/parallelize/dtrain.ini
delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.0.gz
delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.1.gz
delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.2.gz
delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.3.gz
delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.4.gz
delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.5.gz
delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.6.gz
delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.7.gz
delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.8.gz
delete mode 100644 training/dtrain/test/parallelize/g/grammar.out.9.gz
delete mode 100644 training/dtrain/test/parallelize/in
delete mode 100644 training/dtrain/test/parallelize/refs
delete mode 100644 training/dtrain/test/toy/cdec.ini
delete mode 100644 training/dtrain/test/toy/dtrain.ini
delete mode 100644 training/dtrain/test/toy/input
(limited to 'training/dtrain')
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index dfb5b351..fcb46db2 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -254,8 +254,6 @@ main(int argc, char** argv)
time_t start, end;
time(&start);
- igzstream grammar_buf_in;
- if (t > 0) grammar_buf_in.open(grammar_buf_fn.c_str());
score_t score_sum = 0.;
score_t model_sum(0);
unsigned ii = 0, rank_errors = 0, margin_violations = 0, npairs = 0, f_count = 0, list_sz = 0;
diff --git a/training/dtrain/examples/parallelized/README b/training/dtrain/examples/parallelized/README
new file mode 100644
index 00000000..89715105
--- /dev/null
+++ b/training/dtrain/examples/parallelized/README
@@ -0,0 +1,5 @@
+run for example
+ ../../parallelize.rb ./dtrain.ini 4 false 2 2 ./in ./refs
+
+final weights will be in the file work/weights.3
+
diff --git a/training/dtrain/examples/parallelized/cdec.ini b/training/dtrain/examples/parallelized/cdec.ini
new file mode 100644
index 00000000..e43ba1c4
--- /dev/null
+++ b/training/dtrain/examples/parallelized/cdec.ini
@@ -0,0 +1,22 @@
+formalism=scfg
+add_pass_through_rules=true
+intersection_strategy=cube_pruning
+cubepruning_pop_limit=200
+scfg_max_span_limit=15
+feature_function=WordPenalty
+feature_function=KLanguageModel ../example/nc-wmt11.en.srilm.gz
+#feature_function=ArityPenalty
+#feature_function=CMR2008ReorderingFeatures
+#feature_function=Dwarf
+#feature_function=InputIndicator
+#feature_function=LexNullJump
+#feature_function=NewJump
+#feature_function=NgramFeatures
+#feature_function=NonLatinCount
+#feature_function=OutputIndicator
+#feature_function=RuleIdentityFeatures
+#feature_function=RuleNgramFeatures
+#feature_function=RuleShape
+#feature_function=SourceSpanSizeFeatures
+#feature_function=SourceWordPenalty
+#feature_function=SpanFeatures
diff --git a/training/dtrain/examples/parallelized/dtrain.ini b/training/dtrain/examples/parallelized/dtrain.ini
new file mode 100644
index 00000000..f19ef891
--- /dev/null
+++ b/training/dtrain/examples/parallelized/dtrain.ini
@@ -0,0 +1,16 @@
+k=100
+N=4
+learning_rate=0.0001
+gamma=0
+loss_margin=1.0
+epochs=1
+scorer=stupid_bleu
+sample_from=kbest
+filter=uniq
+pair_sampling=XYX
+hi_lo=0.1
+select_weights=last
+print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
+# newer version of the grammar extractor use different feature names:
+#print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
+decoder_config=cdec.ini
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz
new file mode 100644
index 00000000..1e28a24b
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz
new file mode 100644
index 00000000..372f5675
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz
new file mode 100644
index 00000000..145d0dc0
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz
new file mode 100644
index 00000000..105593ff
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz
new file mode 100644
index 00000000..30781f48
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz
new file mode 100644
index 00000000..834ee759
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz
new file mode 100644
index 00000000..2e76f348
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz
new file mode 100644
index 00000000..3741a887
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz
new file mode 100644
index 00000000..ebf6bd0c
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz differ
diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz
new file mode 100644
index 00000000..c1791059
Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz differ
diff --git a/training/dtrain/examples/parallelized/in b/training/dtrain/examples/parallelized/in
new file mode 100644
index 00000000..51d01fe7
--- /dev/null
+++ b/training/dtrain/examples/parallelized/in
@@ -0,0 +1,10 @@
+europas nach rassen geteiltes haus
+ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .
+der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .
+während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .
+eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .
+die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .
+das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .
+die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .
+der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .
+genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .
diff --git a/training/dtrain/examples/parallelized/refs b/training/dtrain/examples/parallelized/refs
new file mode 100644
index 00000000..632e27b0
--- /dev/null
+++ b/training/dtrain/examples/parallelized/refs
@@ -0,0 +1,10 @@
+europe 's divided racial house
+a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge .
+the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them .
+while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon .
+an aging population at home and ever more open borders imply increasing racial fragmentation in european countries .
+mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear .
+it will not , as america 's racial history clearly shows .
+race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes .
+the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths .
+this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us .
diff --git a/training/dtrain/examples/parallelized/work/out.0.0 b/training/dtrain/examples/parallelized/work/out.0.0
new file mode 100644
index 00000000..7a00ed0f
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.0.0
@@ -0,0 +1,61 @@
+ cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 3121929377
+
+dtrain
+Parameters:
+ k 100
+ N 4
+ T 1
+ scorer 'stupid_bleu'
+ sample from 'kbest'
+ filter 'uniq'
+ learning rate 0.0001
+ gamma 0
+ loss margin 1
+ pairs 'XYX'
+ hi lo 0.1
+ pair threshold 0
+ select weights 'last'
+ l1 reg 0 'none'
+ max pairs 4294967295
+ cdec cfg 'cdec.ini'
+ input 'work/shard.0.0.in'
+ refs 'work/shard.0.0.refs'
+ output 'work/weights.0.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+ 5
+WEIGHTS
+ Glue = +0.2663
+ WordPenalty = -0.0079042
+ LanguageModel = +0.44782
+ LanguageModel_OOV = -0.0401
+ PhraseModel_0 = -0.193
+ PhraseModel_1 = +0.71321
+ PhraseModel_2 = +0.85196
+ PhraseModel_3 = -0.43986
+ PhraseModel_4 = -0.44803
+ PhraseModel_5 = -0.0538
+ PhraseModel_6 = -0.1788
+ PassThrough = -0.1477
+ ---
+ 1best avg score: 0.17521 (+0.17521)
+ 1best avg model score: 21.556 (+21.556)
+ avg # pairs: 1671.2
+ avg # rank err: 1118.6
+ avg # margin viol: 552.6
+ non0 feature count: 12
+ avg list sz: 100
+ avg f count: 11.32
+(time 0.37 min, 4.4 s/S)
+
+Writing weights file to 'work/weights.0.0' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.17521].
+This took 0.36667 min.
diff --git a/training/dtrain/examples/parallelized/work/out.0.1 b/training/dtrain/examples/parallelized/work/out.0.1
new file mode 100644
index 00000000..e2bd6649
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.0.1
@@ -0,0 +1,62 @@
+ cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 2767202922
+
+dtrain
+Parameters:
+ k 100
+ N 4
+ T 1
+ scorer 'stupid_bleu'
+ sample from 'kbest'
+ filter 'uniq'
+ learning rate 0.0001
+ gamma 0
+ loss margin 1
+ pairs 'XYX'
+ hi lo 0.1
+ pair threshold 0
+ select weights 'last'
+ l1 reg 0 'none'
+ max pairs 4294967295
+ cdec cfg 'cdec.ini'
+ input 'work/shard.0.0.in'
+ refs 'work/shard.0.0.refs'
+ output 'work/weights.0.1'
+ weights in 'work/weights.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+ 5
+WEIGHTS
+ Glue = -0.2699
+ WordPenalty = +0.080605
+ LanguageModel = -0.026572
+ LanguageModel_OOV = -0.30025
+ PhraseModel_0 = -0.32076
+ PhraseModel_1 = +0.67451
+ PhraseModel_2 = +0.92
+ PhraseModel_3 = -0.36402
+ PhraseModel_4 = -0.592
+ PhraseModel_5 = -0.0269
+ PhraseModel_6 = -0.28755
+ PassThrough = -0.33285
+ ---
+ 1best avg score: 0.26638 (+0.26638)
+ 1best avg model score: 53.197 (+53.197)
+ avg # pairs: 2028.6
+ avg # rank err: 998.2
+ avg # margin viol: 918.8
+ non0 feature count: 12
+ avg list sz: 100
+ avg f count: 10.496
+(time 0.32 min, 3.8 s/S)
+
+Writing weights file to 'work/weights.0.1' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.26638].
+This took 0.31667 min.
diff --git a/training/dtrain/examples/parallelized/work/out.1.0 b/training/dtrain/examples/parallelized/work/out.1.0
new file mode 100644
index 00000000..6e790e38
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.1.0
@@ -0,0 +1,61 @@
+ cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 1432415010
+
+dtrain
+Parameters:
+ k 100
+ N 4
+ T 1
+ scorer 'stupid_bleu'
+ sample from 'kbest'
+ filter 'uniq'
+ learning rate 0.0001
+ gamma 0
+ loss margin 1
+ pairs 'XYX'
+ hi lo 0.1
+ pair threshold 0
+ select weights 'last'
+ l1 reg 0 'none'
+ max pairs 4294967295
+ cdec cfg 'cdec.ini'
+ input 'work/shard.1.0.in'
+ refs 'work/shard.1.0.refs'
+ output 'work/weights.1.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+ 5
+WEIGHTS
+ Glue = -0.3815
+ WordPenalty = +0.20064
+ LanguageModel = +0.95304
+ LanguageModel_OOV = -0.264
+ PhraseModel_0 = -0.22362
+ PhraseModel_1 = +0.12254
+ PhraseModel_2 = +0.26328
+ PhraseModel_3 = +0.38018
+ PhraseModel_4 = -0.48654
+ PhraseModel_5 = +0
+ PhraseModel_6 = -0.3645
+ PassThrough = -0.2216
+ ---
+ 1best avg score: 0.10863 (+0.10863)
+ 1best avg model score: -4.9841 (-4.9841)
+ avg # pairs: 1345.4
+ avg # rank err: 822.4
+ avg # margin viol: 501
+ non0 feature count: 11
+ avg list sz: 100
+ avg f count: 11.814
+(time 0.45 min, 5.4 s/S)
+
+Writing weights file to 'work/weights.1.0' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.10863].
+This took 0.45 min.
diff --git a/training/dtrain/examples/parallelized/work/out.1.1 b/training/dtrain/examples/parallelized/work/out.1.1
new file mode 100644
index 00000000..0b984761
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/out.1.1
@@ -0,0 +1,62 @@
+ cdec cfg 'cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ../example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Seeding random number sequence to 1771918374
+
+dtrain
+Parameters:
+ k 100
+ N 4
+ T 1
+ scorer 'stupid_bleu'
+ sample from 'kbest'
+ filter 'uniq'
+ learning rate 0.0001
+ gamma 0
+ loss margin 1
+ pairs 'XYX'
+ hi lo 0.1
+ pair threshold 0
+ select weights 'last'
+ l1 reg 0 'none'
+ max pairs 4294967295
+ cdec cfg 'cdec.ini'
+ input 'work/shard.1.0.in'
+ refs 'work/shard.1.0.refs'
+ output 'work/weights.1.1'
+ weights in 'work/weights.0'
+(a dot represents 10 inputs)
+Iteration #1 of 1.
+ 5
+WEIGHTS
+ Glue = -0.3178
+ WordPenalty = +0.11092
+ LanguageModel = +0.17269
+ LanguageModel_OOV = -0.13485
+ PhraseModel_0 = -0.45371
+ PhraseModel_1 = +0.38789
+ PhraseModel_2 = +0.75311
+ PhraseModel_3 = -0.38163
+ PhraseModel_4 = -0.58817
+ PhraseModel_5 = -0.0269
+ PhraseModel_6 = -0.27315
+ PassThrough = -0.16745
+ ---
+ 1best avg score: 0.13169 (+0.13169)
+ 1best avg model score: 24.226 (+24.226)
+ avg # pairs: 1951.2
+ avg # rank err: 985.4
+ avg # margin viol: 951
+ non0 feature count: 12
+ avg list sz: 100
+ avg f count: 11.224
+(time 0.42 min, 5 s/S)
+
+Writing weights file to 'work/weights.1.1' ...
+done
+
+---
+Best iteration: 1 [SCORE 'stupid_bleu'=0.13169].
+This took 0.41667 min.
diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.in b/training/dtrain/examples/parallelized/work/shard.0.0.in
new file mode 100644
index 00000000..92f9c78e
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.0.0.in
@@ -0,0 +1,5 @@
+europas nach rassen geteiltes haus
+ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .
+der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .
+während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .
+eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .
diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.refs b/training/dtrain/examples/parallelized/work/shard.0.0.refs
new file mode 100644
index 00000000..bef68fee
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.0.0.refs
@@ -0,0 +1,5 @@
+europe 's divided racial house
+a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge .
+the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them .
+while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon .
+an aging population at home and ever more open borders imply increasing racial fragmentation in european countries .
diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.in b/training/dtrain/examples/parallelized/work/shard.1.0.in
new file mode 100644
index 00000000..b7695ce7
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.1.0.in
@@ -0,0 +1,5 @@
+die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .
+das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .
+die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .
+der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .
+genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .
diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.refs b/training/dtrain/examples/parallelized/work/shard.1.0.refs
new file mode 100644
index 00000000..6076f6d5
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/shard.1.0.refs
@@ -0,0 +1,5 @@
+mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear .
+it will not , as america 's racial history clearly shows .
+race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes .
+the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths .
+this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us .
diff --git a/training/dtrain/examples/parallelized/work/weights.0 b/training/dtrain/examples/parallelized/work/weights.0
new file mode 100644
index 00000000..ddd595a8
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.0
@@ -0,0 +1,12 @@
+LanguageModel 0.7004298992212881
+PhraseModel_2 0.5576194336478857
+PhraseModel_1 0.41787318415343155
+PhraseModel_4 -0.46728502545635164
+PhraseModel_3 -0.029839521598455515
+Glue -0.05760000000000068
+PhraseModel_6 -0.2716499999999978
+PhraseModel_0 -0.20831031065605327
+LanguageModel_OOV -0.15205000000000077
+PassThrough -0.1846500000000006
+WordPenalty 0.09636994553433414
+PhraseModel_5 -0.026900000000000257
diff --git a/training/dtrain/examples/parallelized/work/weights.0.0 b/training/dtrain/examples/parallelized/work/weights.0.0
new file mode 100644
index 00000000..c9370b18
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.0.0
@@ -0,0 +1,12 @@
+WordPenalty -0.0079041595706392243
+LanguageModel 0.44781580828279532
+LanguageModel_OOV -0.04010000000000042
+Glue 0.26629999999999948
+PhraseModel_0 -0.19299677809125185
+PhraseModel_1 0.71321026861732773
+PhraseModel_2 0.85195540993310537
+PhraseModel_3 -0.43986310822842656
+PhraseModel_4 -0.44802855630415955
+PhraseModel_5 -0.053800000000000514
+PhraseModel_6 -0.17879999999999835
+PassThrough -0.14770000000000036
diff --git a/training/dtrain/examples/parallelized/work/weights.0.1 b/training/dtrain/examples/parallelized/work/weights.0.1
new file mode 100644
index 00000000..8fad3de8
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.0.1
@@ -0,0 +1,12 @@
+WordPenalty 0.080605055841244472
+LanguageModel -0.026571720531022844
+LanguageModel_OOV -0.30024999999999141
+Glue -0.26989999999999842
+PhraseModel_2 0.92000295209089566
+PhraseModel_1 0.67450748692470841
+PhraseModel_4 -0.5920000014976784
+PhraseModel_3 -0.36402437203127397
+PhraseModel_6 -0.28754999999999603
+PhraseModel_0 -0.32076244202907672
+PassThrough -0.33284999999999004
+PhraseModel_5 -0.026900000000000257
diff --git a/training/dtrain/examples/parallelized/work/weights.1 b/training/dtrain/examples/parallelized/work/weights.1
new file mode 100644
index 00000000..03058a16
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.1
@@ -0,0 +1,12 @@
+PhraseModel_2 0.8365578543552836
+PhraseModel_4 -0.5900840266009169
+PhraseModel_1 0.5312000609786991
+PhraseModel_0 -0.3872342271319619
+PhraseModel_3 -0.3728279676912084
+Glue -0.2938500000000036
+PhraseModel_6 -0.2803499999999967
+PassThrough -0.25014999999999626
+LanguageModel_OOV -0.21754999999999702
+LanguageModel 0.07306061161169894
+WordPenalty 0.09576193325966899
+PhraseModel_5 -0.026900000000000257
diff --git a/training/dtrain/examples/parallelized/work/weights.1.0 b/training/dtrain/examples/parallelized/work/weights.1.0
new file mode 100644
index 00000000..6a6a65c1
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.1.0
@@ -0,0 +1,11 @@
+WordPenalty 0.20064405063930751
+LanguageModel 0.9530439901597807
+LanguageModel_OOV -0.26400000000000112
+Glue -0.38150000000000084
+PhraseModel_0 -0.22362384322085468
+PhraseModel_1 0.12253609968953538
+PhraseModel_2 0.26328345736266612
+PhraseModel_3 0.38018406503151553
+PhraseModel_4 -0.48654149460854373
+PhraseModel_6 -0.36449999999999722
+PassThrough -0.22160000000000085
diff --git a/training/dtrain/examples/parallelized/work/weights.1.1 b/training/dtrain/examples/parallelized/work/weights.1.1
new file mode 100644
index 00000000..f56ea4a2
--- /dev/null
+++ b/training/dtrain/examples/parallelized/work/weights.1.1
@@ -0,0 +1,12 @@
+WordPenalty 0.1109188106780935
+LanguageModel 0.17269294375442074
+LanguageModel_OOV -0.13485000000000266
+Glue -0.3178000000000088
+PhraseModel_2 0.75311275661967159
+PhraseModel_1 0.38789263503268989
+PhraseModel_4 -0.58816805170415531
+PhraseModel_3 -0.38163156335114284
+PhraseModel_6 -0.27314999999999739
+PhraseModel_0 -0.45370601223484697
+PassThrough -0.16745000000000249
+PhraseModel_5 -0.026900000000000257
diff --git a/training/dtrain/examples/standard/README b/training/dtrain/examples/standard/README
new file mode 100644
index 00000000..ce37d31a
--- /dev/null
+++ b/training/dtrain/examples/standard/README
@@ -0,0 +1,2 @@
+Call `dtrain` from this folder with ../../dtrain -c dtrain.ini .
+
diff --git a/training/dtrain/examples/standard/cdec.ini b/training/dtrain/examples/standard/cdec.ini
new file mode 100644
index 00000000..e1edc68d
--- /dev/null
+++ b/training/dtrain/examples/standard/cdec.ini
@@ -0,0 +1,26 @@
+formalism=scfg
+add_pass_through_rules=true
+scfg_max_span_limit=15
+intersection_strategy=cube_pruning
+cubepruning_pop_limit=200
+grammar=nc-wmt11.grammar.gz
+feature_function=WordPenalty
+feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz
+# all currently working feature functions for translation:
+# (with those features active that were used in the ACL paper)
+#feature_function=ArityPenalty
+#feature_function=CMR2008ReorderingFeatures
+#feature_function=Dwarf
+#feature_function=InputIndicator
+#feature_function=LexNullJump
+#feature_function=NewJump
+#feature_function=NgramFeatures
+#feature_function=NonLatinCount
+#feature_function=OutputIndicator
+feature_function=RuleIdentityFeatures
+feature_function=RuleSourceBigramFeatures
+feature_function=RuleTargetBigramFeatures
+feature_function=RuleShape
+#feature_function=SourceSpanSizeFeatures
+#feature_function=SourceWordPenalty
+#feature_function=SpanFeatures
diff --git a/training/dtrain/examples/standard/dtrain.ini b/training/dtrain/examples/standard/dtrain.ini
new file mode 100644
index 00000000..a05e9c29
--- /dev/null
+++ b/training/dtrain/examples/standard/dtrain.ini
@@ -0,0 +1,24 @@
+input=./nc-wmt11.de.gz
+refs=./nc-wmt11.en.gz
+output=- # a weights file (add .gz for gzip compression) or STDOUT '-'
+select_weights=avg # output average (over epochs) weight vector
+decoder_config=./cdec.ini # config for cdec
+# weights for these features will be printed on each iteration
+print_weights= EgivenFCoherent SampleCountF CountEF MaxLexFgivenE MaxLexEgivenF IsSingletonF IsSingletonFE Glue WordPenalty PassThrough LanguageModel LanguageModel_OOV
+# newer version of the grammar extractor use different feature names:
+#print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
+stop_after=10 # stop epoch after 10 inputs
+
+# interesting stuff
+epochs=2 # run over input 2 times
+k=100 # use 100best lists
+N=4 # optimize (approx) BLEU4
+scorer=stupid_bleu # use 'stupid' BLEU+1
+learning_rate=1.0 # learning rate, don't care if gamma=0 (perceptron)
+gamma=0 # use SVM reg
+sample_from=kbest # use kbest lists (as opposed to forest)
+filter=uniq # only unique entries in kbest (surface form)
+pair_sampling=XYX #
+hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here
+pair_threshold=0 # minimum distance in BLEU (here: > 0)
+loss_margin=0
diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output
new file mode 100644
index 00000000..8d72f4c3
--- /dev/null
+++ b/training/dtrain/examples/standard/expected-output
@@ -0,0 +1,1206 @@
+ cdec cfg './cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading ./nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+ Example feature: Shape_S00000_T00000
+Seeding random number sequence to 1511823303
+
+dtrain
+Parameters:
+ k 100
+ N 4
+ T 2
+ scorer 'stupid_bleu'
+ sample from 'kbest'
+ filter 'uniq'
+ learning rate 1
+ gamma 0
+ loss margin 0
+ pairs 'XYX'
+ hi lo 0.1
+ pair threshold 0
+ select weights 'avg'
+ l1 reg 0 'none'
+ max pairs 4294967295
+ cdec cfg './cdec.ini'
+ input './nc-wmt11.de.gz'
+ refs './nc-wmt11.en.gz'
+ output '-'
+ stop_after 10
+(a dot represents 10 inputs)
+Iteration #1 of 2.
+ . 10
+Stopping after 10 input sentences.
+WEIGHTS
+ EgivenFCoherent = +0
+ SampleCountF = +0
+ CountEF = +0
+ MaxLexFgivenE = +0
+ MaxLexEgivenF = +0
+ IsSingletonF = +0
+ IsSingletonFE = +0
+ Glue = -576
+ WordPenalty = +417.79
+ PassThrough = -1455
+ LanguageModel = +5117.5
+ LanguageModel_OOV = -1307
+ ---
+ 1best avg score: 0.27697 (+0.27697)
+ 1best avg model score: -47918 (-47918)
+ avg # pairs: 1129.8
+ avg # rank err: 581.9
+ avg # margin viol: 0
+ non0 feature count: 703
+ avg list sz: 90.9
+ avg f count: 100.09
+(time 0.33 min, 2 s/S)
+
+Iteration #2 of 2.
+ . 10
+WEIGHTS
+ EgivenFCoherent = +0
+ SampleCountF = +0
+ CountEF = +0
+ MaxLexFgivenE = +0
+ MaxLexEgivenF = +0
+ IsSingletonF = +0
+ IsSingletonFE = +0
+ Glue = -622
+ WordPenalty = +898.56
+ PassThrough = -2578
+ LanguageModel = +8066.2
+ LanguageModel_OOV = -2590
+ ---
+ 1best avg score: 0.37119 (+0.094226)
+ 1best avg model score: -1.3174e+05 (-83822)
+ avg # pairs: 1214.9
+ avg # rank err: 584.1
+ avg # margin viol: 0
+ non0 feature count: 1115
+ avg list sz: 91.3
+ avg f count: 90.755
+(time 0.27 min, 1.6 s/S)
+
+Writing weights file to '-' ...
+R:X:NX_sein:N1_its 61.5
+WordPenalty 658.17328732437022
+LanguageModel 6591.8747593425214
+LanguageModel_OOV -1948.5
+R:X:das_NX:this_N1 12
+R:X:NX_sein_NX:N1_from_ever_being_able_to_N2 30
+R:X:NX_bemühen:N1_effort 2.5
+RBS:X_bemühen 2.5
+R:X:sich:sich -17.5
+RBT:_sich -17.5
+RBT:sich_ -17.5
+RBS:sich_X 17.5
+RBS:_als 147
+RBS:als_ -59
+Shape_S10000_T10000 -1711.5
+RBT:_when 84
+R:X:zum_NX:as_N1 -134
+RBS:_zum -30
+R:X:als_NX:as_N1 63
+R:X:zum_NX:'s_N1 33
+R:X:zum_NX:the_N1 24
+RBS:X_sich -12
+R:X:zum_NX:to_N1 -36
+R:X:zum_NX:with_the_N1 83
+R:X:NX_zum:N1_the -66
+R:X:NX_zum:N1_to 66
+R:X:als_NX:when_N1 84
+RBS:als_das 59
+RBS:X_das -104
+R:X:NX_das:N1_a 28.5
+R:X:er_sich_NX:he_N1 86.5
+RBS:er_sich 29.5
+R:X:NX_das:N1_it -6
+R:X:er_sich_NX:him_N1 -57
+RBT:_declared -488
+R:X:NX_das:N1_that -5
+RBT:declared_ -8
+R:X:NX_das:N1_the -57
+R:X:NX_das:N1_this -17
+R:X:NX_.:N1_. -323
+RBS:X_. 134
+R:X:NX_.:N1_debate_. 6.5
+R:X:NX_.:N1_disruptions_. -14.5
+R:X:NX_.:N1_established_. 7.5
+R:X:NX_.:N1_heading_. 17
+R:X:NX_.:N1_on_. 94
+R:X:NX_.:N1_pace_. 51.5
+R:X:NX_das_NX:N1_a_growing_N2 -45
+R:X:general:general -23.5
+R:X:NX_.:N1_politics_. 84
+R:X:NX_das_NX:N1_a_N2 -0.5
+R:X:NX_.:N1_power_. -99.5
+RBS:general_ -23.5
+R:X:NX_.:N1_-_range_missiles_. -28.5
+Shape_S11000_T11000 40
+RBT:general_ -23.5
+RBT:_. -645
+R:X:betrat:entered -91
+R:X:NX_.:N1_war_. 68.5
+RBS:_betrat 23.5
+Shape_S11000_T01100 475.5
+RBT:_entered -91
+RBT:entered_ -91
+R:X:NX_das_NX:N1_the_N2 -2
+R:X:betrat:betrat 114.5
+RBT:_betrat 114.5
+RBT:betrat_ 114.5
+R:X:12:12 79
+R:X:maßnahmen:action 24
+R:X:.:. -566
+RBS:12_ 79
+RBS:_maßnahmen -44.5
+RBS:_. -645
+RBT:._ -566
+RBT:_action 24
+RBT:12_ 79
+RBT:action_ 24
+R:X:maßnahmen:actions -13
+RBT:_actions -13
+RBT:actions_ -13
+R:X:12_NX:12_N1 -79
+RBT:declared_a -428
+RBS:12_X -79
+RBT:a_state -428
+RBT:state_of -428
+R:X:maßnahmen:maßnahmen -55.5
+R:X:internationale_NX:global_N1 -270
+RBS:X_am 316.5
+RBT:_maßnahmen -55.5
+RBS:am_ 267.5
+RBT:maßnahmen_ -55.5
+RBS:_den 883
+R:X:internationale_NX:international_N1 270
+RBS:den_X -286.5
+R:X:NX_am:N1_of 267.5
+R:X:NX_als:N1_a -273.5
+RBS:am_X -281
+R:X:den_NX:'s_N1 -31
+R:X:NX_am_NX:N1_of_N2 -30
+R:X:NX_am_NX:N1_on_N2 79
+R:X:NX_als:N1_'s 273.5
+R:X:NX_betrat:N1_entered -23.5
+R:X:ins_NX:into_the_N1 -32.5
+RBS:X_betrat -23.5
+RBT:into_the -55
+R:X:ins_NX:into_N1 32.5
+RBT:_their 303
+R:X:general_NX:general_N1 23.5
+RBS:general_X 23.5
+RBS:_am -316.5
+R:X:den_NX:the_N1 89
+R:X:den_NX_.:the_N1_. 86.5
+R:X:NX_und:and_N1 -216
+RBS:X_und -203.5
+RBS:und_ 522.5
+RBT:_and 438.5
+R:X:am_NX:at_N1 23
+R:X:NX_als_das:N1_than_the 59
+R:X:NX_und:N1_- -114
+R:X:NX_und:N1_, 114
+R:X:am_NX:of_N1 -4
+R:X:am_NX:on_N1 -158.5
+R:X:am_NX:the_N1 -190
+RBS:_seine -16.5
+RBS:seine_ 39
+R:X:oktober:october -79.5
+R:X:seine:his -5.5
+RBS:_oktober -79.5
+R:X:seine:its 50
+RBT:_october -79.5
+RBT:october_ -79.5
+R:X:seine_NX:a_N1 7.5
+RBS:seine_X -39
+R:X:NX_und_NX:and_N1_N2 -22
+RBS:und_X 160.5
+R:X:seine_NX:his_N1 -97
+R:X:seine_NX:its_N1 102.5
+R:X:NX_und_NX:N1_,_and_N2 -4
+R:X:NX_maßnahmen:N1_actions 44.5
+RBS:X_maßnahmen 44.5
+R:X:seine_NX_als:his_N1_than 5.5
+R:X:seine_NX_als:its_N1_as -64.5
+R:X:NX_und_NX:N1_,_N2 -7
+Shape_S01100_T11000 -312.5
+RBS:und_den -822.5
+Shape_S01100_T01100 -537.5
+Shape_S01100_T11100 15
+R:X:NX_seine:'s_N1 -5.5
+RBS:X_seine 16.5
+RBS:X_den -38
+R:X:amerika_NX_sich_NX:america_N1_N2 -12
+R:X:NX_seine_NX:'s_N1_N2 22
+R:X:auf_NX_den_NX:to_N1_the_N2 -23
+R:X:auf_NX_den_NX:to_N1_N2 -23
+RBS:_unterstützen -716
+RBS:unterstützen_ -1
+Shape_S11100_T11000 783.5
+Shape_S11100_T01100 -716
+Shape_S11100_T11100 488
+R:X:unterstützen:unterstützen -1
+RBT:_unterstützen -1
+RBT:unterstützen_ -1
+R:X:unterstützen_NX:support_N1 -715
+RBS:unterstützen_X -715
+RBT:_will -6
+RBS:X_unterstützen 716
+RBT:_if 35
+R:X:NX_den_NX_.:N1_N2_. 41
+R:X:verfassung:constitution 15
+RBS:_verfassung -43
+RBT:_constitution 15
+RBT:constitution_ 15
+R:X:verfassung:constitutional 9.5
+RBT:_constitutional 9.5
+RBS:unterstützen_. 716
+RBT:constitutional_ 9.5
+R:X:NX_unterstützen_.:N1_. 716
+R:X:verfassung:verfassung -67.5
+R:X:eine_NX:an_N1 162
+RBT:_verfassung -67.5
+RBT:verfassung_ -67.5
+R:X:und:, -21.5
+R:X:,_NX_zu_NX:to_N2_N1 -153
+RBS:_und -389.5
+R:X:und:and -35
+RBS:angeführten_ -716
+RBT:and_ -35
+RBT:_as 63
+RBS:versucht_ 68
+R:X:und:with -3
+R:X:eine_NX:is_N1 -162
+RBS:angeführten_X 716
+R:X:und:und 91
+RBT:_und 91
+RBT:und_ 91
+R:X:versucht:tried 68
+RBT:tried_ 68
+RBS:versucht_X -68
+R:X:versucht_NX:tried_N1 -68
+R:X:und_NX:and_N1 250
+R:X:und_NX:with_N1 -18
+R:X:und_NX:,_N1 -7
+R:X:und_NX:N1_and -12
+R:X:und_den_NX:and_N1 -716
+R:X:er:he 17
+R:X:NX_eine:N1_is -7
+RBS:_er -47.5
+RBS:er_ 54
+RBT:_he 485.5
+RBT:he_ 17
+RBT:_him -1
+R:X:und_NX_.:,_N1_. -3
+R:X:er:his 91
+R:X:und_den_NX_.:and_the_N1_. 88
+R:X:NX_eine:N1_will 7
+R:X:er:it 3
+R:X:und_den_NX_.:and_N1_. -216.5
+R:X:er:er -196
+RBT:_er -196
+RBT:er_ -196
+RBS:er_X 8
+R:X:er_NX:he_N1 399
+R:X:er_NX:it_N1 -379
+Shape_S01010_T01010 -599
+RBS:pakistanischen_ 43
+R:X:NX_versucht:N1_tried 196
+RBT:_pakistan -43
+RBT:_pakistani 2
+R:X:er_NX_,_NX:he_N1_N2 -12
+R:X:NX_hat_er:N1_,_he_has 196
+RBS:hat_er 196
+R:X:NX_er:he_N1 -17
+RBS:X_er -148.5
+RBS:pakistanischen_X -43
+R:X:NX_er:it_N1 -7
+RBS:X_verfassung 43
+R:X:NX_verfassung:N1_'s_constitution 43
+R:X:NX_hat_NX_versucht:N1_N2_has_tried -190
+R:X:NX_hat_NX_versucht:N1_,_N2_has_tried -6
+RBS:der_pakistanischen 43
+RBS:X_pakistanischen -43
+RBS:_aber 46
+RBS:,_als -147
+RBT:_but -321
+R:X:aber_NX:but_N1 46
+R:X:von_NX_angeführten:N1_-_led -716
+R:X:von_NX_angeführten_NX:N1_-_led_N2 716
+RBS:,_aber -114
+RBS:X_aber 68
+R:X:,_als_NX:,_as_N1 -40
+R:X:NX_aber_NX_,:N1_N2_to 68
+R:X:NX_pakistanischen_NX_.:pakistan_N1_N2_. -43
+R:X:NX_,_aber_NX:N1_,_N2 -114
+RBS:_rahmen 43
+RBS:rahmen_ 43
+R:X:rahmen:within 20
+R:X:rahmen:rahmen 23
+RBT:_rahmen 23
+RBT:rahmen_ 23
+Shape_S01110_T11010 35.5
+R:X:NX_der_pakistanischen:N1_pakistan 43
+Shape_S01110_T01110 -1195
+Shape_S01110_T11110 -6.5
+R:X:NX_,_NX_er:N1_N2_he -33
+RBS:geben_X -577.5
+RBS:_gestalten 196
+Shape_S01110_T01011 278
+RBS:gestalten_ 196
+RBS:geben_und 577.5
+R:X:gestalten:more 221
+Shape_S01110_T01111 -181.5
+RBT:_more 221
+RBT:more_ 221
+R:X:gestalten:gestalten -25
+RBT:_gestalten -25
+RBT:gestalten_ -25
+R:X:effektiver:effectively -151
+RBS:_effektiver 54
+RBS:effektiver_ -221
+RBT:_effectively -151
+RBT:effectively_ -151
+R:X:effektiver:effektiver -99
+RBT:_effektiver -99
+RBT:effektiver_ -99
+Shape_S11110_T11010 -1130
+RBS:zu_geben -107.5
+R:X:effektiver_zu_NX:N1_effectively 304
+RBS:effektiver_zu 221
+RBS:X_geben 107.5
+Shape_S11110_T01110 621
+Shape_S11110_T11110 -75
+RBS:X_gestalten -196
+R:X:NX_gestalten_.:N1_. -196
+RBS:gestalten_. -196
+R:X:terror:terror 672
+RBS:_terror -16
+RBS:terror_ 640
+R:X:den:- -4
+RBT:_terror 136
+RBT:terror_ 646
+RBS:den_ 42.5
+R:X:den:for -11.5
+R:X:terror:terrorism -54
+RBT:_terrorism -54
+Shape_S11110_T11011 -4.5
+RBT:terrorism_ -54
+R:X:terror_NX:terror_N1 -634
+R:X:den:of -17
+RBS:terror_X -640
+R:X:den:'s 32.5
+Shape_S11110_T01111 -1.5
+R:X:NX_effektiver:N1_more_effectively 29
+RBS:X_effektiver -54
+R:X:den:the 68
+R:X:NX_geben_und:N1_and 107.5
+R:X:NX_effektiver_zu_NX:N1_N2_effectively -83
+R:X:den:to -33
+RBS:1999_ -302.5
+R:X:,_NX_zu_geben_NX:to_N1_N2 -577.5
+R:X:den:with -10
+RBS:X_terror -4.5
+R:X:,_NX_zu_geben_und:to_N1_and 470
+R:X:NX_1999:N1_1999 -302.5
+R:X:NX_1999_NX:N2_N1_1999 302.5
+RBS:1999_X 302.5
+R:X:den_NX_zu:to_N1 783.5
+R:X:NX_rahmen_der:N1_the -43
+RBS:X_rahmen -43
+RBS:rahmen_der -43
+RBS:gegen_ 22.5
+R:X:gegen:against -2
+RBT:_against -2
+RBT:against_ -2
+R:X:._NX:._N1 -79
+RBS:._X -79.5
+RBS:gegen_den -22.5
+R:X:NX_._oktober:october_N1 79.5
+RBS:._oktober 79.5
+R:X:am_NX_._NX:the_N2_N1 -0.5
+R:X:gegen_den_NX:on_N1 2
+RBS:den_terror 20.5
+RBT:on_terror -26
+R:X:NX_den_terror:the_N1_terror 29
+R:X:den_NX_den_NX:the_N1_N2 -110.5
+R:X:den_NX_den_NX:N2_the_N1 -95
+RBT:_the -1.5
+R:X:krieg:war -4.5
+RBS:_krieg -22
+R:X:musharraf:musharraf 43
+RBS:krieg_ -4.5
+RBT:_war -22
+RBS:_musharraf 66.5
+RBS:musharraf_ -23.5
+RBT:war_ -4.5
+R:X:musharraf_NX:musharraf_imposed_N1 23.5
+RBS:musharraf_X 23.5
+RBT:musharraf_imposed 23.5
+RBS:krieg_gegen 4.5
+R:X:musharraf_NX:musharraf_N1 107
+R:X:krieg_gegen:war_on 24.5
+RBT:war_on -17.5
+RBS:X_gegen -4.5
+R:X:musharraf_NX_,_als_NX:musharraf_N1_as_N2 -20
+R:X:musharraf_NX_,_als_NX:musharraf_N1_N2 -87
+R:X:krieg_gegen_den_NX:war_on_N1 -16
+R:X:krieg_gegen_den_terror:war_on_terror -26
+R:X:pervez:pervez 22
+RBS:_pervez 22
+RBS:pervez_ 57.5
+RBS:X_krieg 22
+RBT:_pervez 22
+RBT:pervez_ 22
+RBS:pervez_musharraf -57.5
+RBS:X_musharraf -9
+R:X:NX_musharraf:N1_musharraf -9
+R:X:den_NX_gegen_den:the_N1_on -4.5
+R:X:den_NX_den_terror:the_N1_terror -3
+R:X:NX_krieg_gegen_den_terror:N1_war_on_terror 22
+R:X:den_NX_den_terror_NX:N2_the_N1_terror -1.5
+RBT:_project 91
+RBS:hat_ 2
+RBS:X_- 14
+R:X:NX_-:,_N1 48.5
+R:X:NX_-:N1_months_of 32
+R:X:NX_-:N1_relief_and 64
+R:X:NX_-:N1_'s -144.5
+RBS:hat_X -198
+R:X:und_NX_terror_NX:and_N2_N1_terror -4.5
+RBT:and_ -4.5
+R:X:sorgen:bring -19
+RBS:X_pervez -22
+RBT:_bring -19
+RBT:bring_ -19
+R:X:sorgen:ensure 19
+RBT:_ensure 19
+RBT:ensure_ 19
+R:X:NX_-_NX:N1_N2_security -4
+R:X:NX_projekt_NX:N2_N1_project -156
+R:X:NX_-_NX_.:N1_N2_. 18
+R:X:NX_projekt_NX_.:N2_N1_project_. 156
+RBS:_- -14
+RBT:to_ensure 0.5
+R:X:NX_hat:has_N1 -5
+R:X:NX_hat:N1_, 3
+R:X:NX_hat:,_N1 21.5
+R:X:NX_hat:N1_has -17
+R:X:NX_hat:N1_is -0.5
+R:X:-_NX:of_N1 -26
+R:X:-_NX:'s_N1 -58
+R:X:NX_hat_NX:N1_,_N2 -73
+R:X:NX_hat_NX:N1_N2_has 28
+R:X:-_NX:-_N1 122
+R:X:NX_hat_NX:N1_,_N2_has 21
+R:X:-_NX:--_N1 -21
+R:X:-_NX:,_N1 -31
+R:X:stabilität:stability -118
+RBS:_stabilität -129
+RBT:_stability -118
+RBT:stability_ -118
+R:X:stabilität:stabilität -11
+RBT:_stabilität -11
+RBT:stabilität_ -11
+RBT:_country 253
+RBS:_für 101
+RBS:für_ 129
+RBS:X_ihres -16
+R:X:NX_ihres_NX:N1_of_their_N2 -16
+R:X:für:that 129
+RBT:_political -16
+RBS:für_X -129
+R:X:,_NX_und_NX:,_N1_N2 -2
+R:X:für_NX:to_N1 -28
+R:X:NX_stabilität:N1_stability 129
+RBS:X_stabilität 129
+RBS:X_für 22
+RBT:_with -109
+RBS:,_für -123
+R:X:,_für_NX:,_N1 15.5
+R:X:,_NX_den_NX_zu:to_N2_N1 69
+R:X:NX_für_NX_.:N1_N2_. 22
+RBS:_ihres 16
+R:X:ihres_NX:its_N1 -50
+R:X:ihres_NX:their_N1 66
+R:X:NX_zu_verkaufen_NX:sell_N1_N2 140.5
+RBS:verkaufen_X 140.5
+RBS:_würde -204
+RBS:würde_ -117
+R:X:würde:would -204
+RBS:würde_X 126
+R:X:in_NX_hat_NX:in_N1_N2 22
+R:X:NX_dem_NX_pervez:N1_N2_pervez 35.5
+RBS:_halten 284
+RBS:halten_ 204
+R:X:NX_dem_NX_pervez_musharraf:N1_N2_pervez_musharraf -57.5
+Shape_S01111_T01011 560.5
+Shape_S01111_T11011 -20.5
+Shape_S01111_T01111 -5
+RBT:_maintain 30
+R:X:halten:halten 284
+RBT:_halten 284
+RBT:halten_ 284
+RBS:halten_X -204
+R:X:NX_würde:if_N1 35
+RBS:X_würde 204
+R:X:NX_würde:will_N1 -6
+Shape_S11111_T11010 69
+R:X:NX_würde:would_face_a_N1 -9.5
+RBT:would_face -18.5
+RBT:face_a -18.5
+Shape_S11111_T11110 -57
+R:X:NX_würde:would_N1 78
+R:X:NX_würde:N1_will -10.5
+R:X:NX_würde_NX:would_N1_N2 126
+R:X:NX_würde_.:would_face_a_N1_. -9
+RBS:würde_. -9
+PhraseModel_0 -2973.8953021225416
+R:X:vielleicht:may -177
+PhraseModel_1 -4012.0052074229625
+PhraseModel_2 -1203.5725821427027
+RBS:vielleicht_ -284
+PhraseModel_3 2747.8420998127522
+PhraseModel_4 -3205.3163436680484
+PhraseModel_5 720.5
+PhraseModel_6 275
+R:X:vielleicht:vielleicht -107
+RBT:_vielleicht -107
+RBT:vielleicht_ -107
+R:X:vielleicht_NX:perhaps_N1 284
+RBS:vielleicht_X 284
+R:X:NX_halten:maintain_the_N1 -29
+RBS:X_halten -284
+RBT:maintain_the -174
+R:X:NX_halten:N1_hold -51
+R:X:NX_halten_NX:N2_maintain_the_N1 -204
+RBT:_maintain -204
+RBS:_versprechen 30
+RBS:versprechen_ -75
+RBT:_commitment 107
+R:X:versprechen_NX:commitment_N1 30
+RBS:versprechen_X 75
+R:X:NX_versprechen:N1_commitment -75
+RBS:X_versprechen -30
+R:X:NX_,_für_NX:N1_,_N2 -138.5
+R:X:NX_versprechen_NX:N1_commitment_N2 45
+RBS:_dass -451
+RBS:dass_ -91.5
+R:X:dass_NX:that_N1 -451
+RBS:dass_X 91.5
+R:X:NX_er_sein:N1_to_make_up_for_his -91.5
+RBS:er_sein -91.5
+R:X:seine_NX_und:a_N1_, -15
+R:X:NX_,_NX_und:N1_N2_, 129
+RBS:,_dass 851.5
+R:X:NX_,_dass:N1_keep -27
+R:X:NX_,_dass:N1_said_that -0.5
+R:X:NX_,_dass:N1_to_let -9.5
+R:X:NX_dass:that_N1 -8.5
+RBS:X_dass -400.5
+R:X:NX_dass:N1_let -51.5
+R:X:NX_dass:N1_see -243.5
+R:X:NX_dass:N1_thought -97
+R:X:NX_,_dass_NX:N1_that_N2 134
+Glue -599
+PassThrough -2016.5
+R:X:musharrafs:his 2
+RBS:musharrafs_ -29
+R:X:NX_und_den:N1_and_the 22
+RBT:_his 250.5
+RBT:his_ 160.5
+R:X:musharrafs:musharraf -1.5
+RBT:_musharraf 135.5
+RBT:musharraf_ 41.5
+R:X:NX_,_dass_NX_.:N1_N2_. 91.5
+R:X:musharrafs:musharrafs -29.5
+RBT:_musharrafs -29.5
+RBT:musharrafs_ -29.5
+RBS:sie_X 346
+RBS:_X -1369.5
+R:X:dies:so -74.5
+RBS:X_ -1743
+RBS:dies_ -348
+R:X:dies:so_,_this 47
+RBT:so_, 47
+R:X:sie_NX:it_N1 22
+RBT:,_this 47
+R:X:dies:that -256.5
+R:X:NX_?:N1_? -134.5
+R:X:dies:these -5.5
+RBS:X_? -235
+RBT:_these -5.5
+RBT:these_ -5.5
+R:X:NX_?:N1_consulting_? -100.5
+R:X:dies:this -58.5
+R:X:letzter_NX:last_N1 -14
+RBS:_letzter -20
+RBS:letzter_X 19.5
+RBT:_last -2
+R:X:letzter:last 7
+RBS:letzter_ -19.5
+R:X:sein:be 1.5
+RBT:last_ 7
+R:X:letzter:late 11.5
+RBT:_they -6
+RBS:sein_ 68
+RBT:_late 11.5
+R:X:ist_NX:be_N1 464.5
+RBT:_be -10.5
+RBT:late_ 11.5
+R:X:sie_NX:they_N1 -22
+RBS:_ist 415.5
+RBT:be_ 120
+R:X:letzter:letzter -24.5
+RBS:ist_X 8
+R:X:sein:being -16
+RBT:_letzter -24.5
+R:X:ist_NX:has_N1 16
+RBT:_being -79
+RBT:letzter_ -24.5
+R:X:ist_NX:is_at_N1 6
+RBT:being_ -16
+R:X:musharrafs_NX:his_N1 -25
+R:X:sein:his 73
+RBS:musharrafs_X 29
+R:X:ist_NX:is_well_N1 6
+R:X:sein:its -15.5
+R:X:musharrafs_NX:musharraf_'s_N1 77.5
+R:X:sein:sein 55
+RBT:musharraf_'s 55.5
+R:X:ist_NX:is_N1 23
+RBT:_sein 55
+R:X:musharrafs_NX:musharraf_N1 -23.5
+R:X:ist_NX:more_N1 -130.5
+RBT:sein_ 55
+R:X:NX_letzter:N1_late -26.5
+R:X:ist_NX:N1_be 176
+R:X:ziel:aim -32.5
+RBS:X_letzter 20
+R:X:ist_NX:N1_has -67
+RBS:_ziel -143
+R:X:NX_letzter:N1_'s_last 13
+R:X:ist_NX:N1_is -19
+RBS:ziel_ -219
+R:S:NS_NX:N1_N2 -599
+R:X:ist_NX:N1_,_is 18
+RBT:_aim -32.5
+RBS:_S -599
+R:X:ist_NX:N1_it_is 49
+RBT:aim_ -32.5
+RBS:S_X -599
+R:X:ist:are -65.5
+R:X:ziel:goal 45
+R:X:NX_letzter_NX:N1_'s_last_N2 33.5
+RBS:ist_ -8
+RBT:_goal 45
+R:X:?:? 235
+RBT:goal_ 45
+RBS:_? 235
+R:X:ziel:target -22.5
+RBT:_? 235
+RBS:X__ -347
+RBT:_target -22.5
+RBT:?_ 235
+RBT:target_ -22.5
+R:X:ist:'s -61
+R:X:ziel:targets -18
+RBS:in_ -22
+RBT:_targets -18
+RBT:targets_ -18
+RBT:_, 24.5
+R:X:ziel:ziel -125
+RBT:,_ -38
+R:X:NX___NX:N1___N2 -347
+R:X:dies_NX:so_N1 200
+RBT:_ziel -125
+RBS:dies_X 256
+RBT:ziel_ -125
+RBT:_at 23
+R:X:dies_NX:this_to_N1 156.5
+R:X:ziel_NX:goal_N1 49
+RBT:this_to 156.5
+RBS:ziel_X 219
+R:X:dies_NX:this_N1 -100.5
+R:X:ziel_NX:targets_N1 -19
+R:X:dies_ist:could_be 118.5
+R:X:ziel_NX:target_N1 -20
+RBS:dies_ist 92
+R:X:sein_NX:being_able_to_N1 -71.5
+RBT:in_ -65.5
+R:X:in:for 31
+RBT:_could 118.5
+RBS:sein_X -68
+RBT:could_be 118.5
+RBT:being_able -63
+RBT:_for 14.5
+RBT:able_to -63
+RBT:for_ 14.5
+R:X:sein_NX:be_N1 -10
+R:X:sein_NX:his_N1 184.5
+RBS:X_ist -507.5
+R:X:sein_NX:its_N1 -26.5
+R:X:in:in -53
+R:X:sein_NX:N1_be -174.5
+R:X:NX_ziel:N1_aim -32.5
+RBT:_in -75.5
+RBS:X_ziel 143
+R:X:NX_ziel:N1_goal 20
+R:X:NX_ziel:N1_target -26.5
+R:X:NX_ziel:N1_targets -27
+RBT:_into -270
+R:X:NX_ziel_NX:N1_goal_N2 60
+R:X:NX_ziel_NX:N1_targets_N2 -6
+R:X:NX_sie_NX_,_dass:N1_N2_that 346
+R:X:NX_ziel_NX:N1_target_N2 -6
+R:X:dies_ist_NX:this_is_N1 -26.5
+R:X:NX_ziel_NX:N2_N1_goal 161
+RBT:_of -38
+RBT:of_ -17
+R:X:NX_ist_NX:is_N1_N2 -129
+RBS:_die 428.5
+R:X:NX_ist_NX:is_N1_,_N2 16.5
+RBS:die_ -116
+RBT:_on -653.5
+RBT:on_ 84.5
+R:X:NX_ist_NX:'s_N1_N2 -41.5
+R:X:die:, -9
+RBT:_over 45
+R:X:die:a -5
+R:X:NX_ist_NX:N1_has_N2 -104.5
+R:X:blieben_NX:remained_N1 135
+R:X:die:an -123
+R:X:NX_ist_NX:N1_is_at_N2 -5.5
+RBS:_blieben 187.5
+R:X:NX_ist_NX:N1_is_well_N2 -5
+RBS:blieben_X -13
+RBT:_are -65.5
+RBT:_'s 16
+R:X:NX_ist_NX:N1_is_N2 -31
+RBT:are_ -65.5
+RBT:'s_ -28.5
+R:X:blieben_NX:N1_remained 81.5
+R:X:NX_ist_NX:N1_,_is_N2 59.5
+R:X:die:by -10
+R:X:die:its 302.5
+RBS:_pakistanis 57
+RBS:pakistanis_ 116.5
+RBT:_to 93.5
+RBT:_pakistanis 161
+R:X:NX_ist_NX:N1_N2_has -75
+R:X:die:the -28
+RBT:to_ 18
+R:X:NX_ist_NX:N1_N2_is -97.5
+R:X:pakistanis_NX:pakistanis_N1 57
+R:X:NX_ist_NX:N1_,_N2_is -1
+RBT:_those -6
+RBT:_within 20
+RBT:within_ 20
+RBS:pakistanis_X -116.5
+R:X:NX_blieben_NX:N1_,_N2_remained -229.5
+R:X:NX_ist_NX:N2_is_N1 -47
+RBS:X_blieben -187.5
+RBT:_is -21
+R:X:NX_pakistanis:pakistanis_,_N1 235.5
+RBS:X_pakistanis -57
+RBT:pakistanis_, 104
+R:X:NX_pakistanis:N1_pakistanis -119
+R:X:NX_ist_NX:N2_N1_is -46.5
+RBS:blieben_ 13
+RBT:_is -251
+R:X:blieben:blieben -29
+RBT:_blieben -29
+RBT:blieben_ -29
+R:X:NX_pakistanis_NX:pakistanis_,_N1_,_N2 -23
+RBS:_zu -560
+R:X:NX_pakistanis_NX:N1_pakistanis_N2 -150.5
+RBS:zu_X -717.5
+R:X:NX_blieben:N1_,_remained 42
+RBS:__ 347
+RBS:_ein 37.5
+RBS:ein_ -9.5
+RBS:der_ -88.5
+R:X:zu_NX:for_N1 43
+R:X:__NX:__N1 -97
+RBT:_- 113
+RBT:-_ -4
+R:X:__NX:,_N1 444
+R:X:zu_NX:in_N1 37.5
+RBT:_a -27.5
+RBT:a_ -5
+RBS:sie_ -346
+RBT:the_ 40
+R:X:zu_NX:to_N1 -716
+R:X:zu_NX:with_N1 40.5
+R:X:zu_NX:N1_on 30
+RBT:_the 324.5
+R:X:NX_sie:but_N1 -346
+RBS:X_ein -37.5
+RBT:be_transformed -12
+R:X:medien:media 299.5
+RBS:_medien -71.5
+RBT:_with 54.5
+RBS:medien_ -156
+RBT:with_ -19
+RBT:_media 299.5
+R:X:NX_ein:N1_has_an -3.5
+RBT:media_ 299.5
+R:X:NX_ein:N1_put_forward_a -6
+R:X:medien:medien -371
+RBT:_medien -371
+RBT:medien_ -371
+RBS:der_X 45
+RBS:medien_X 156
+R:X:NX_zu_NX:in_N2_N1 -9.5
+RBS:X_zu 339
+RBT:in_ -2.5
+R:X:NX_zu_NX:of_N2_N1 -52.5
+RBT:to_ -102.5
+RBT:_to 30
+R:X:,_dass_NX:that_N1 317
+R:X:NX_zu_NX:to_N2_N1 19
+R:X:NX_zu_NX:N1_in_N2 -2
+R:X:NX_zu_NX:N1_is_N2 -2
+RBS:X_macht -0.5
+R:X:NX_zu_NX:N1_to_N2 48
+R:X:NX_macht_NX:N1_N2_does -0.5
+R:X:NX_zu_NX:N2_N1_to -28
+R:X:NX_zu_NX_.:to_N2_N1_. 22.5
+RBS:an_ 28
+R:X:NX_zu_NX_.:N1_is_N2_. -3.5
+R:X:NX_zu_NX_.:N1_to_N2_. 7.5
+R:X:NX_zu_NX_.:N1_with_N2_. -3
+R:X:NX_zu_NX_.:N1_N2_. -221.5
+R:X:NX_zu_NX_.:N2_N1_. 4.5
+R:X:freien:free -83.5
+RBS:_freien -118
+RBS:freien_ -201.5
+RBT:_free 210
+RBT:free_ -83.5
+R:X:freien:freien -276
+RBT:_freien -276
+RBT:freien_ -276
+RBT:_an 31.5
+R:X:freien_NX:free_N1 248
+RBT:an_ -123
+RBS:freien_X 201.5
+R:X:NX_medien:N1_media -90
+RBS:X_medien 71.5
+R:X:amerika:america 193
+RBS:_amerika -36
+R:X:NX_medien_NX:N2_N1_media 5
+R:X:an_NX:in_N1 210
+R:X:freien_NX_.:free_N1_. -6.5
+RBS:amerika_ -131
+R:X:NX_medien_NX_.:N2_N1_media_. 151
+RBT:_america 283.5
+RBT:america_ 193
+R:X:die_NX:an_N1 -7.5
+R:X:amerika:american -3
+RBS:die_X -45.5
+RBT:_american -3
+RBT:american_ -3
+R:X:amerika:amerika -321
+RBS:_jener 62.5
+R:X:die_NX:a_N1 19
+RBT:_amerika -321
+RBS:jener_X 62.5
+RBT:amerika_ -321
+R:X:jener_NX:the_N1 62.5
+R:X:an_NX:to_N1 -210
+RBS:X_jener -62.5
+RBS:amerika_X 131
+R:X:amerika_NX:america_N1 107
+R:X:die_NX:is_N1 -2.5
+RBS:an_der -28
+R:X:auf:, -5
+R:X:die_NX:its_N1 -14
+RBS:auf_ 46.5
+R:X:die_NX:'s_N1 46.5
+RBS:X_der 71
+R:X:NX_der:N1_for -74
+R:X:NX_der:N1_in -43
+R:X:auf:in -5.5
+RBT:_choice -103
+R:X:die_NX:the_N1 -86.5
+RBT:_decision 103
+R:X:auf:on 60
+R:X:die_NX:those_N1 -6
+R:X:NX_der:N1_to 72
+R:X:entscheidung_NX:choice_is_N1 -103
+R:X:die_NX:with_N1 73.5
+R:X:auf:auf -3
+RBT:choice_is -103
+RBT:_auf -3
+R:X:entscheidung_NX:decision_N1 103
+R:X:die_NX:,_N1 57
+R:X:die_NX:N1_is -0.5
+RBT:auf_ -3
+R:X:die_NX:N1_'s -1
+RBS:auf_X -46.5
+R:X:die_NX:N1_the -1
+R:X:NX_freien:N1_free 158
+RBT:of_ -13
+RBS:X_freien 118
+R:X:NX_der_NX:over_N2_N1 45
+R:X:NX_freien_NX:N1_free_N2 -34
+R:X:NX_freien_NX:N1_free_,_N2 -6
+RBT:over_ 45
+R:X:die_NX_medien:the_N1_media 5.5
+R:X:auf_NX:in_N1 -46.5
+RBT:the_ -0.5
+R:X:auf_NX:on_N1 66
+R:X:auf_NX:to_N1 -2
+R:X:auf_NX:,_N1 -18
+RBS:X_amerika 36
+RBT:_may -177
+RBS:und_die 139.5
+RBT:may_ -177
+RBT:_ 585.5
+RBT:_would -18.5
+RBS:X_die -568
+RBT:would_ -204
+R:X:NX_die:the_N1 34.5
+R:X:NX_amerika_NX:N2_N1_america 36
+R:X:terroranschläge:terrorist -22
+R:X:NX_die:,_N1 -42
+R:X:NX_die:N1_, -173
+RBS:_terroranschläge -161.5
+RBS:der_macht 0.5
+R:X:NX_die:-_N1 -5
+RBS:terroranschläge_ -46
+R:X:NX_die:N1_a -1
+RBT:_terrorist -119.5
+R:X:NX_der_macht_NX:N1_hold_N2_power 28
+RBT:terrorist_ -22
+R:X:,:, -2.5
+RBT:terrorist_attacks 77.5
+RBS:_, -182
+RBT:attacks_ 28
+RBS:,_ -160.5
+R:X:terroranschläge:terroranschläge -52
+RBT:_terroranschläge -52
+RBT:__ -139
+RBT:terroranschläge_ -52
+R:X:NX_die:N1_its -128.5
+RBS:terroranschläge_X 46
+RBT:_-- -64
+R:X:terroranschläge_NX:terrorist_attacks_N1 -87.5
+RBT:_by -10
+RBT:by_ -10
+R:X:,:out -3.5
+RBT:_out -3.5
+R:X:und_die_NX:and_N1 218
+RBT:out_ -3.5
+RBT:_that -261.5
+R:X:NX_die_NX:the_N1_N2 -1
+RBT:that_ -127.5
+R:X:NX_die_NX:the_N2_N1 -4
+RBS:,_X -335
+RBT:,_as -40
+R:X:,_NX:in_N1 -239
+R:X:,_NX:of_N1 -4
+R:X:,_NX:on_N1 -166
+R:X:,_NX:to_N1 649
+R:X:NX_die_NX:N1_the_N2 -4
+R:X:,_NX:,_N1 -399
+R:X:,_NX:__N1 -42
+R:X:,_NX:--_N1 -102
+R:X:,_an:to 28
+RBS:,_an 28
+R:X:NX_die_NX:N1_,_N2 -5
+R:X:NX_die_NX:N1_N2_the -4
+RBS:X_an -28
+RBS:die_terroranschläge 161.5
+R:X:die_terroranschläge:,_terrorist_attacks 28
+RBT:,_terrorist 175
+R:X:die_terroranschläge_NX:,_terrorist_attacks_N1 147
+R:X:NX_so:N1_as -1.5
+R:X:justiz:judiciary -90
+RBS:_justiz -1
+RBS:justiz_ -220.5
+R:X:NX_so:N1_that -14
+RBT:_judiciary 215
+R:X:NX_so:N1_the 15.5
+RBT:judiciary_ -90
+R:X:justiz:justiz -216
+RBT:_justiz -216
+RBT:justiz_ -216
+R:X:justiz_NX:judiciary_N1 305
+RBS:justiz_X 205
+RBS:_brachten -28
+RBS:justiz_und 15.5
+RBS:brachten_ -175
+R:X:NX_und_die:'s_N1_and -5
+R:X:brachten:brachten -175
+RBT:_brachten -175
+RBT:brachten_ -175
+R:X:NX_an_der:N1_the -0.5
+R:X:brachten_NX:N1_brought 147
+RBS:brachten_X 175
+R:X:NX_die_terroranschläge_NX:,_terrorist_attacks_N2_N1 -13.5
+R:X:NX_und_die:N1_'s -12
+R:X:NX_und_die_NX:'s_N2_N1 -16
+RBS:_2001 -14.5
+RBS:2001_ 28
+RBT:_2001 37.5
+R:X:NX_und_die_NX:N1_and_N2 -159
+RBT:2001_ 28
+R:X:2001_NX:2001_N1 147
+RBS:2001_X -28
+R:X:NX_brachten_NX:N1_N2_brought 28
+RBS:X_brachten 28
+RBT:,_ -109.5
+R:X:2001_NX_die_NX:2001_,_N2_N1 -161.5
+R:X:unabhängige:independent 38
+RBT:2001_, -109.5
+RBS:_unabhängige 127
+RBS:unabhängige_ -197
+RBT:_independent 343
+RBT:independent_ 38
+RBT:_september -13.5
+R:X:unabhängige:unabhängige -198
+RBT:_unabhängige -198
+RBS:ein_X 9.5
+RBT:unabhängige_ -198
+RBS:september_X -14.5
+R:X:unabhängige_NX:independent_N1 287
+R:X:ein_NX:an_N1 132
+R:X:ein_NX:any_N1 25
+RBS:unabhängige_X 197
+R:X:NX_justiz:N1_judiciary 85.5
+R:X:NX_an_der_macht_NX:N1_of_power_N2 -27.5
+RBS:X_justiz 1
+R:X:NX_justiz_NX:N1_judiciary_N2 -43
+R:X:NX_justiz_und:N1_judiciary_and 15.5
+RBS:_11 -13.5
+R:X:NX_unabhängige:N1_independent -37
+R:X:ein_NX:a_N1 -93
+RBS:X_unabhängige -127
+R:X:ein_NX:one_N1 -15
+R:X:NX_unabhängige_NX:N1_independent_N2 -90
+R:X:ein_NX:-_N1 -11.5
+R:X:NX_ein_NX:an_N1_N2 -6
+R:X:NX_ein_NX:be_transformed_N1_N2 -22
+RBS:X_, -3.5
+RBS:september_2001 14.5
+RBT:,_2001 14.5
+R:X:NX_,:to_N1 68
+R:X:NX_,:N1__ 1
+R:X:NX_,:N1_-- -172.5
+R:X:11_._september_2001_NX:september_11_,_2001_N1 -13.5
+R:X:die_NX_und_NX:the_N1_N2 -10
+R:X:NX_,:N1_for -127.5
+R:X:NX_,:N1_in -13.5
+R:X:NX_,:N1_of -55
+R:X:NX_,:N1_on 257.5
+R:X:NX_,:N1_out -58
+RBS:am_11 13.5
+R:X:die_NX_justiz_NX_die:the_N1_judiciary_N2 -57
+R:X:NX_,:N1_refuses_to -232.5
+R:X:die_NX_und_die:the_N1_and 148
+R:X:die_NX_und_die:the_N1_and_the -2.5
+RBT:the_september 13.5
+R:X:die_NX_die_NX:the_N1_N2 -3
+R:X:am_11_._september_NX:the_september_11_,_N1 -14.5
+R:X:die_NX_und_die_NX:the_N1_and_N2 -32
+RBS:zu_ 672
+R:X:NX_,_NX:N1_,_N2 -78
+R:X:NX_,_NX:N1_N2_, 80
+R:X:am_11_._september_2001:the_september_11_,_2001 28
+R:X:zu:for -5
+R:X:zu:in -7
+R:X:zu:to 23
+R:X:taliban:taliban -251.5
+RBS:_taliban -223.5
+RBS:taliban_ -157.5
+R:X:zu:with -6
+R:X:verzweifelten:desperate 28.5
+RBT:_taliban -205.5
+RBT:_desperate 28.5
+RBT:taliban_ -107
+RBT:desperate_ 28.5
+R:X:taliban_NX:taliban_N1 28
+R:X:verzweifelten:verzweifelten -28.5
+RBS:taliban_X 157.5
+R:X:NX_zu:to_N1 -229
+RBT:_verzweifelten -28.5
+R:X:den_taliban:the_taliban 144.5
+RBT:verzweifelten_ -28.5
+RBS:den_taliban 223.5
+RBT:the_taliban 144.5
+R:X:NX_zu:N1_for -152
+R:X:NX_zu:N1_in -6
+R:X:NX_zu:N1_is 251
+R:X:NX_zu:N1_of -49.5
+RBS:_dem 22
+RBT:_its 458
+RBT:its_ 337
+R:X:NX_den_taliban:N1_taliban -50.5
+R:X:NX_den_taliban_NX:N1_taliban_N2 -2.5
+R:X:NX_den_taliban_NX:N2_N1_taliban 132
+R:X:erklärte:declared -8
+RBS:_erklärte -185.5
+RBS:erklärte_ -124.5
+RBT:_declaring -9
+R:X:erklärte:erklärte -116.5
+RBT:_erklärte -116.5
+RBT:erklärte_ -116.5
+R:X:erklärte_NX:declared_N1 -52
+RBS:erklärte_X -61
+RBS:jener_ -62.5
+R:X:erklärte_NX:declaring_N1 -9
+RBS:erklärte_, 185.5
+R:X:NX_jener:N1_of -62.5
+R:X:dem_NX:the_N1 22
+R:X:verkaufen:sell -153
+RBS:_verkaufen -153
+RBS:verkaufen_ -140.5
+RBT:sell_ -153
+RBS:bereit_ 86
+RBS:zu_verkaufen 153
+RBS:_bemühen -2.5
+R:X:bereit:bereit 86
+RBT:_bereit 86
+RBT:bereit_ 86
+R:X:bereit_NX:ready_N1 -31
+RBS:bereit_X -86
+R:X:bereit_NX:N1_ready -55
+RBS:X_zum 30
+R:X:bemühen:bemühen -2.5
+R:X:NX_erklärte_,:N1_, 110
+RBT:_bemühen -2.5
+RBS:X_erklärte 185.5
+RBT:bemühen_ -2.5
+R:X:NX_erklärte_,_NX:N1_,_N2 75.5
+RBS:in_X 22
+RBS:_sich -17.5
+R:X:NX_zu_verkaufen:sell_N1 12.5
+RBS:sich_ -17.5
+R:X:NX_zum_NX:N2_to_further_N1 30
+RBS:_das 45
+RBS:das_ 2.5
+RBT:to_further 30
+RBT:_it -381
+RBT:it_ 3
+RBT:_so 172.5
+RBT:so_ -74.5
+RBT:_this 9.5
+RBT:this_ -11.5
+RBS:X_dem -22
+R:X:das_NX:a_growing_N1 77
+RBS:das_X -2.5
+RBT:a_growing -41
+R:X:das_NX:be_N1 169
+R:X:das_NX:its_N1 -95
+R:X:das_NX:so_N1 -38
+RBS:X_sein 91.5
+R:X:das_NX:the_N1 -80
+done
+
+---
+Best iteration: 2 [SCORE 'stupid_bleu'=0.37119].
+This took 0.6 min.
diff --git a/training/dtrain/examples/standard/nc-wmt11.de.gz b/training/dtrain/examples/standard/nc-wmt11.de.gz
new file mode 100644
index 00000000..0741fd92
Binary files /dev/null and b/training/dtrain/examples/standard/nc-wmt11.de.gz differ
diff --git a/training/dtrain/examples/standard/nc-wmt11.en.gz b/training/dtrain/examples/standard/nc-wmt11.en.gz
new file mode 100644
index 00000000..1c0bd401
Binary files /dev/null and b/training/dtrain/examples/standard/nc-wmt11.en.gz differ
diff --git a/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz b/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz
new file mode 100644
index 00000000..7ce81057
Binary files /dev/null and b/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz differ
diff --git a/training/dtrain/examples/standard/nc-wmt11.grammar.gz b/training/dtrain/examples/standard/nc-wmt11.grammar.gz
new file mode 100644
index 00000000..ce4024a1
Binary files /dev/null and b/training/dtrain/examples/standard/nc-wmt11.grammar.gz differ
diff --git a/training/dtrain/examples/toy/cdec.ini b/training/dtrain/examples/toy/cdec.ini
new file mode 100644
index 00000000..b14f4819
--- /dev/null
+++ b/training/dtrain/examples/toy/cdec.ini
@@ -0,0 +1,3 @@
+formalism=scfg
+add_pass_through_rules=true
+grammar=grammar.gz
diff --git a/training/dtrain/examples/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini
new file mode 100644
index 00000000..cd715f26
--- /dev/null
+++ b/training/dtrain/examples/toy/dtrain.ini
@@ -0,0 +1,13 @@
+decoder_config=cdec.ini
+input=src
+refs=tgt
+output=-
+print_weights=logp shell_rule house_rule small_rule little_rule PassThrough
+k=4
+N=4
+epochs=2
+scorer=bleu
+sample_from=kbest
+filter=uniq
+pair_sampling=all
+learning_rate=1
diff --git a/training/dtrain/examples/toy/expected-output b/training/dtrain/examples/toy/expected-output
new file mode 100644
index 00000000..1da2aadd
--- /dev/null
+++ b/training/dtrain/examples/toy/expected-output
@@ -0,0 +1,77 @@
+Warning: hi_lo only works with pair_sampling XYX.
+ cdec cfg 'cdec.ini'
+Seeding random number sequence to 1664825829
+
+dtrain
+Parameters:
+ k 4
+ N 4
+ T 2
+ scorer 'bleu'
+ sample from 'kbest'
+ filter 'uniq'
+ learning rate 1
+ gamma 0
+ loss margin 0
+ pairs 'all'
+ pair threshold 0
+ select weights 'last'
+ l1 reg 0 'none'
+ max pairs 4294967295
+ cdec cfg 'cdec.ini'
+ input 'src'
+ refs 'tgt'
+ output '-'
+(a dot represents 10 inputs)
+Iteration #1 of 2.
+ 2
+WEIGHTS
+ logp = +0
+ shell_rule = -1
+ house_rule = +2
+ small_rule = -2
+ little_rule = +3
+ PassThrough = -5
+ ---
+ 1best avg score: 0.5 (+0.5)
+ 1best avg model score: 2.5 (+2.5)
+ avg # pairs: 4
+ avg # rank err: 1.5
+ avg # margin viol: 0
+ non0 feature count: 6
+ avg list sz: 4
+ avg f count: 2.875
+(time 0 min, 0 s/S)
+
+Iteration #2 of 2.
+ 2
+WEIGHTS
+ logp = +0
+ shell_rule = -1
+ house_rule = +2
+ small_rule = -2
+ little_rule = +3
+ PassThrough = -5
+ ---
+ 1best avg score: 1 (+0.5)
+ 1best avg model score: 5 (+2.5)
+ avg # pairs: 5
+ avg # rank err: 0
+ avg # margin viol: 0
+ non0 feature count: 6
+ avg list sz: 4
+ avg f count: 3
+(time 0 min, 0 s/S)
+
+Writing weights file to '-' ...
+house_rule 2
+little_rule 3
+Glue -4
+PassThrough -5
+small_rule -2
+shell_rule -1
+done
+
+---
+Best iteration: 2 [SCORE 'bleu'=1].
+This took 0 min.
diff --git a/training/dtrain/examples/toy/grammar.gz b/training/dtrain/examples/toy/grammar.gz
new file mode 100644
index 00000000..8eb0d29e
Binary files /dev/null and b/training/dtrain/examples/toy/grammar.gz differ
diff --git a/training/dtrain/examples/toy/src b/training/dtrain/examples/toy/src
new file mode 100644
index 00000000..87e39ef2
--- /dev/null
+++ b/training/dtrain/examples/toy/src
@@ -0,0 +1,2 @@
+ich sah ein kleines haus
+ich fand ein kleines haus
diff --git a/training/dtrain/examples/toy/tgt b/training/dtrain/examples/toy/tgt
new file mode 100644
index 00000000..174926b3
--- /dev/null
+++ b/training/dtrain/examples/toy/tgt
@@ -0,0 +1,2 @@
+i saw a little house
+i found a little house
diff --git a/training/dtrain/lplp.rb b/training/dtrain/lplp.rb
index f0cd58c5..86e835e8 100755
--- a/training/dtrain/lplp.rb
+++ b/training/dtrain/lplp.rb
@@ -84,34 +84,28 @@ def _test()
end
#_test()
-# actually do something
+
def usage()
- puts "lplp.rb [n] < "
+ puts "lplp.rb <#shards> < "
puts " l0...: norms for selection"
puts "select_k: only output top k (according to the norm of their column vector) features"
puts " cut: output features with weight >= threshold"
puts " n: if we do not have a shard count use this number for averaging"
- exit
+ exit 1
end
-if ARGV.size < 3 then usage end
+if ARGV.size < 4 then usage end
norm_fun = method(ARGV[0].to_sym)
type = ARGV[1]
x = ARGV[2].to_f
-
-shard_count_key = "__SHARD_COUNT__"
+shard_count = ARGV[3].to_f
STDIN.set_encoding 'utf-8'
STDOUT.set_encoding 'utf-8'
w = {}
-shard_count = 0
while line = STDIN.gets
key, val = line.split /\s+/
- if key == shard_count_key
- shard_count += 1
- next
- end
if w.has_key? key
w[key].push val.to_f
else
@@ -119,8 +113,6 @@ while line = STDIN.gets
end
end
-if ARGV.size == 4 then shard_count = ARGV[3].to_f end
-
if type == 'cut'
cut(w, norm_fun, shard_count, x)
elsif type == 'select_k'
diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb
index 24e7f49e..e661416e 100755
--- a/training/dtrain/parallelize.rb
+++ b/training/dtrain/parallelize.rb
@@ -3,16 +3,15 @@
require 'trollop'
def usage
- if ARGV.size != 8
- STDERR.write "Usage: "
- STDERR.write "ruby parallelize.rb -c -e [--randomize/-z] [--reshard/-y] -s <#shards|0> -p -i -r [--qsub/-q] --dtrain_binary -l \"l2 select_k 100000\"\n"
- exit 1
- end
+ STDERR.write "Usage: "
+ STDERR.write "ruby parallelize.rb -c [-e ] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p ] -i -r [--qsub/-q] [--dtrain_binary ] [-l \"l2 select_k 100000\"]\n"
+ exit 1
end
opts = Trollop::options do
opt :config, "dtrain config file", :type => :string
- opt :epochs, "number of epochs", :type => :int
+ opt :epochs, "number of epochs", :type => :int, :default => 10
+ opt :lplp_args, "arguments for lplp.rb", :type => :string, :default => "l2 select_k 100000"
opt :randomize, "randomize shards before each epoch", :type => :bool, :short => '-z', :default => false
opt :reshard, "reshard after each epoch", :type => :bool, :short => '-y', :default => false
opt :shards, "number of shards", :type => :int
@@ -21,8 +20,8 @@ opts = Trollop::options do
opt :references, "references", :type => :string
opt :qsub, "use qsub", :type => :bool, :default => false
opt :dtrain_binary, "path to dtrain binary", :type => :string
- opt :lplp_args, "arguments for lplp arguments", :type => :string, :default => "l2 select_k 100000"
end
+usage if not opts[:config]&&opts[:shards]&&opts[:input]&&opts[:references]
dtrain_dir = File.expand_path File.dirname(__FILE__)
@@ -32,16 +31,14 @@ else
dtrain_bin = opts[:dtrain_binary]
end
ruby = '/usr/bin/ruby'
-lplp_rb = "#{dtrain_dir}/hstreaming/lplp.rb"
+lplp_rb = "#{dtrain_dir}/lplp.rb"
lplp_args = opts[:lplp_args]
cat = '/bin/cat'
ini = opts[:config]
epochs = opts[:epochs]
-rand = false
-rand = true if opts[:randomize]
-reshard = false
-reshard = true if opts[:reshard]
+rand = opts[:randomize]
+reshard = opts[:reshard]
predefined_shards = false
if opts[:shards] == 0
predefined_shards = true
@@ -49,11 +46,10 @@ if opts[:shards] == 0
else
num_shards = opts[:shards]
end
-shards_at_once = opts[:processes_at_once]
input = opts[:input]
refs = opts[:references]
-use_qsub = false
-use_qsub = true if opts[:qsub]
+use_qsub = opts[:qsub]
+shards_at_once = opts[:processes_at_once]
`mkdir work`
diff --git a/training/dtrain/test/example/README b/training/dtrain/test/example/README
deleted file mode 100644
index 2df77086..00000000
--- a/training/dtrain/test/example/README
+++ /dev/null
@@ -1,8 +0,0 @@
-Small example of input format for distributed training.
-Call dtrain from this folder with ../../dtrain -c test/example/dtrain.ini .
-
-For this to work, undef 'DTRAIN_LOCAL' in dtrain.h
-and recompile.
-
-data can be found here: http://simianer.de/#dtrain
-
diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/test/example/cdec.ini
deleted file mode 100644
index 0215416d..00000000
--- a/training/dtrain/test/example/cdec.ini
+++ /dev/null
@@ -1,25 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
-scfg_max_span_limit=15
-intersection_strategy=cube_pruning
-cubepruning_pop_limit=200
-feature_function=WordPenalty
-feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz
-# all currently working feature functions for translation:
-# (with those features active that were used in the ACL paper)
-#feature_function=ArityPenalty
-#feature_function=CMR2008ReorderingFeatures
-#feature_function=Dwarf
-#feature_function=InputIndicator
-#feature_function=LexNullJump
-#feature_function=NewJump
-#feature_function=NgramFeatures
-#feature_function=NonLatinCount
-#feature_function=OutputIndicator
-feature_function=RuleIdentityFeatures
-feature_function=RuleSourceBigramFeatures
-feature_function=RuleTargetBigramFeatures
-feature_function=RuleShape
-#feature_function=SourceSpanSizeFeatures
-#feature_function=SourceWordPenalty
-#feature_function=SpanFeatures
diff --git a/training/dtrain/test/example/dtrain.ini b/training/dtrain/test/example/dtrain.ini
deleted file mode 100644
index 97fce7f0..00000000
--- a/training/dtrain/test/example/dtrain.ini
+++ /dev/null
@@ -1,22 +0,0 @@
-input=./nc-wmt11.1k.gz # use '-' for STDIN
-output=- # a weights file (add .gz for gzip compression) or STDOUT '-'
-select_weights=VOID # don't output weights
-decoder_config=./cdec.ini # config for cdec
-# weights for these features will be printed on each iteration
-print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
-tmp=/tmp
-stop_after=10 # stop epoch after 10 inputs
-
-# interesting stuff
-epochs=2 # run over input 2 times
-k=100 # use 100best lists
-N=4 # optimize (approx) BLEU4
-scorer=stupid_bleu # use 'stupid' BLEU+1
-learning_rate=1.0 # learning rate, don't care if gamma=0 (perceptron)
-gamma=0 # use SVM reg
-sample_from=kbest # use kbest lists (as opposed to forest)
-filter=uniq # only unique entries in kbest (surface form)
-pair_sampling=XYX
-hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here
-pair_threshold=0 # minimum distance in BLEU (this will still only use pairs with diff > 0)
-loss_margin=0
diff --git a/training/dtrain/test/example/expected-output b/training/dtrain/test/example/expected-output
deleted file mode 100644
index 05326763..00000000
--- a/training/dtrain/test/example/expected-output
+++ /dev/null
@@ -1,89 +0,0 @@
- cdec cfg 'test/example/cdec.ini'
-Loading the LM will be faster if you build a binary file.
-Reading test/example/nc-wmt11.en.srilm.gz
-----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
-****************************************************************************************************
- Example feature: Shape_S00000_T00000
-Seeding random number sequence to 2912000813
-
-dtrain
-Parameters:
- k 100
- N 4
- T 2
- scorer 'stupid_bleu'
- sample from 'kbest'
- filter 'uniq'
- learning rate 1
- gamma 0
- loss margin 0
- pairs 'XYX'
- hi lo 0.1
- pair threshold 0
- select weights 'VOID'
- l1 reg 0 'none'
- max pairs 4294967295
- cdec cfg 'test/example/cdec.ini'
- input 'test/example/nc-wmt11.1k.gz'
- output '-'
- stop_after 10
-(a dot represents 10 inputs)
-Iteration #1 of 2.
- . 10
-Stopping after 10 input sentences.
-WEIGHTS
- Glue = -637
- WordPenalty = +1064
- LanguageModel = +1175.3
- LanguageModel_OOV = -1437
- PhraseModel_0 = +1935.6
- PhraseModel_1 = +2499.3
- PhraseModel_2 = +964.96
- PhraseModel_3 = +1410.8
- PhraseModel_4 = -5977.9
- PhraseModel_5 = +522
- PhraseModel_6 = +1089
- PassThrough = -1308
- ---
- 1best avg score: 0.16963 (+0.16963)
- 1best avg model score: 64485 (+64485)
- avg # pairs: 1494.4
- avg # rank err: 702.6
- avg # margin viol: 0
- non0 feature count: 528
- avg list sz: 85.7
- avg f count: 102.75
-(time 0.083 min, 0.5 s/S)
-
-Iteration #2 of 2.
- . 10
-WEIGHTS
- Glue = -1196
- WordPenalty = +809.52
- LanguageModel = +3112.1
- LanguageModel_OOV = -1464
- PhraseModel_0 = +3895.5
- PhraseModel_1 = +4683.4
- PhraseModel_2 = +1092.8
- PhraseModel_3 = +1079.6
- PhraseModel_4 = -6827.7
- PhraseModel_5 = -888
- PhraseModel_6 = +142
- PassThrough = -1335
- ---
- 1best avg score: 0.277 (+0.10736)
- 1best avg model score: -3110.5 (-67595)
- avg # pairs: 1144.2
- avg # rank err: 529.1
- avg # margin viol: 0
- non0 feature count: 859
- avg list sz: 74.9
- avg f count: 112.84
-(time 0.067 min, 0.4 s/S)
-
-Writing weights file to '-' ...
-done
-
----
-Best iteration: 2 [SCORE 'stupid_bleu'=0.277].
-This took 0.15 min.
diff --git a/training/dtrain/test/parallelize/README b/training/dtrain/test/parallelize/README
deleted file mode 100644
index 89715105..00000000
--- a/training/dtrain/test/parallelize/README
+++ /dev/null
@@ -1,5 +0,0 @@
-run for example
- ../../parallelize.rb ./dtrain.ini 4 false 2 2 ./in ./refs
-
-final weights will be in the file work/weights.3
-
diff --git a/training/dtrain/test/parallelize/cdec.ini b/training/dtrain/test/parallelize/cdec.ini
deleted file mode 100644
index e43ba1c4..00000000
--- a/training/dtrain/test/parallelize/cdec.ini
+++ /dev/null
@@ -1,22 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
-intersection_strategy=cube_pruning
-cubepruning_pop_limit=200
-scfg_max_span_limit=15
-feature_function=WordPenalty
-feature_function=KLanguageModel ../example/nc-wmt11.en.srilm.gz
-#feature_function=ArityPenalty
-#feature_function=CMR2008ReorderingFeatures
-#feature_function=Dwarf
-#feature_function=InputIndicator
-#feature_function=LexNullJump
-#feature_function=NewJump
-#feature_function=NgramFeatures
-#feature_function=NonLatinCount
-#feature_function=OutputIndicator
-#feature_function=RuleIdentityFeatures
-#feature_function=RuleNgramFeatures
-#feature_function=RuleShape
-#feature_function=SourceSpanSizeFeatures
-#feature_function=SourceWordPenalty
-#feature_function=SpanFeatures
diff --git a/training/dtrain/test/parallelize/dtrain.ini b/training/dtrain/test/parallelize/dtrain.ini
deleted file mode 100644
index 03f9d240..00000000
--- a/training/dtrain/test/parallelize/dtrain.ini
+++ /dev/null
@@ -1,15 +0,0 @@
-k=100
-N=4
-learning_rate=0.0001
-gamma=0
-loss_margin=0
-epochs=1
-scorer=stupid_bleu
-sample_from=kbest
-filter=uniq
-pair_sampling=XYX
-hi_lo=0.1
-select_weights=last
-print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
-tmp=/tmp
-decoder_config=cdec.ini
diff --git a/training/dtrain/test/parallelize/g/grammar.out.0.gz b/training/dtrain/test/parallelize/g/grammar.out.0.gz
deleted file mode 100644
index 1e28a24b..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.0.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.1.gz b/training/dtrain/test/parallelize/g/grammar.out.1.gz
deleted file mode 100644
index 372f5675..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.1.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.2.gz b/training/dtrain/test/parallelize/g/grammar.out.2.gz
deleted file mode 100644
index 145d0dc0..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.2.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.3.gz b/training/dtrain/test/parallelize/g/grammar.out.3.gz
deleted file mode 100644
index 105593ff..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.3.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.4.gz b/training/dtrain/test/parallelize/g/grammar.out.4.gz
deleted file mode 100644
index 30781f48..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.4.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.5.gz b/training/dtrain/test/parallelize/g/grammar.out.5.gz
deleted file mode 100644
index 834ee759..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.5.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.6.gz b/training/dtrain/test/parallelize/g/grammar.out.6.gz
deleted file mode 100644
index 2e76f348..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.6.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.7.gz b/training/dtrain/test/parallelize/g/grammar.out.7.gz
deleted file mode 100644
index 3741a887..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.7.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.8.gz b/training/dtrain/test/parallelize/g/grammar.out.8.gz
deleted file mode 100644
index ebf6bd0c..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.8.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/g/grammar.out.9.gz b/training/dtrain/test/parallelize/g/grammar.out.9.gz
deleted file mode 100644
index c1791059..00000000
Binary files a/training/dtrain/test/parallelize/g/grammar.out.9.gz and /dev/null differ
diff --git a/training/dtrain/test/parallelize/in b/training/dtrain/test/parallelize/in
deleted file mode 100644
index 3b7dec39..00000000
--- a/training/dtrain/test/parallelize/in
+++ /dev/null
@@ -1,10 +0,0 @@
-europas nach rassen geteiltes haus
-ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .
-der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .
-während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .
-eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .
-die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .
-das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .
-die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .
-der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .
-genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .
diff --git a/training/dtrain/test/parallelize/refs b/training/dtrain/test/parallelize/refs
deleted file mode 100644
index 632e27b0..00000000
--- a/training/dtrain/test/parallelize/refs
+++ /dev/null
@@ -1,10 +0,0 @@
-europe 's divided racial house
-a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge .
-the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them .
-while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon .
-an aging population at home and ever more open borders imply increasing racial fragmentation in european countries .
-mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear .
-it will not , as america 's racial history clearly shows .
-race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes .
-the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths .
-this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us .
diff --git a/training/dtrain/test/toy/cdec.ini b/training/dtrain/test/toy/cdec.ini
deleted file mode 100644
index 98b02d44..00000000
--- a/training/dtrain/test/toy/cdec.ini
+++ /dev/null
@@ -1,2 +0,0 @@
-formalism=scfg
-add_pass_through_rules=true
diff --git a/training/dtrain/test/toy/dtrain.ini b/training/dtrain/test/toy/dtrain.ini
deleted file mode 100644
index a091732f..00000000
--- a/training/dtrain/test/toy/dtrain.ini
+++ /dev/null
@@ -1,12 +0,0 @@
-decoder_config=test/toy/cdec.ini
-input=test/toy/input
-output=-
-print_weights=logp shell_rule house_rule small_rule little_rule PassThrough
-k=4
-N=4
-epochs=2
-scorer=bleu
-sample_from=kbest
-filter=uniq
-pair_sampling=all
-learning_rate=1
diff --git a/training/dtrain/test/toy/input b/training/dtrain/test/toy/input
deleted file mode 100644
index 4d10a9ea..00000000
--- a/training/dtrain/test/toy/input
+++ /dev/null
@@ -1,2 +0,0 @@
-0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0
-1 ich fand ein kleines haus i found a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0
--
cgit v1.2.3