diff options
author | Patrick Simianer <p@simianer.de> | 2013-03-15 11:31:18 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2013-03-15 11:31:18 +0100 |
commit | cf67d34738e1487f75739dc1e027b1864a06513b (patch) | |
tree | 2080f05433457f84280665e8bec27a2ec9dc259a /training | |
parent | 2b4b3adc764085bccc6ddbde96b8cc7ba4287a9f (diff) |
overhauled ruby scripts and examples
Diffstat (limited to 'training')
51 files changed, 1684 insertions, 165 deletions
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index dfb5b351..fcb46db2 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -254,8 +254,6 @@ main(int argc, char** argv) time_t start, end; time(&start); - igzstream grammar_buf_in; - if (t > 0) grammar_buf_in.open(grammar_buf_fn.c_str()); score_t score_sum = 0.; score_t model_sum(0); unsigned ii = 0, rank_errors = 0, margin_violations = 0, npairs = 0, f_count = 0, list_sz = 0; diff --git a/training/dtrain/test/parallelize/README b/training/dtrain/examples/parallelized/README index 89715105..89715105 100644 --- a/training/dtrain/test/parallelize/README +++ b/training/dtrain/examples/parallelized/README diff --git a/training/dtrain/test/parallelize/cdec.ini b/training/dtrain/examples/parallelized/cdec.ini index e43ba1c4..e43ba1c4 100644 --- a/training/dtrain/test/parallelize/cdec.ini +++ b/training/dtrain/examples/parallelized/cdec.ini diff --git a/training/dtrain/test/parallelize/dtrain.ini b/training/dtrain/examples/parallelized/dtrain.ini index 03f9d240..f19ef891 100644 --- a/training/dtrain/test/parallelize/dtrain.ini +++ b/training/dtrain/examples/parallelized/dtrain.ini @@ -2,7 +2,7 @@ k=100 N=4 learning_rate=0.0001 gamma=0 -loss_margin=0 +loss_margin=1.0 epochs=1 scorer=stupid_bleu sample_from=kbest @@ -11,5 +11,6 @@ pair_sampling=XYX hi_lo=0.1 select_weights=last print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough -tmp=/tmp +# newer version of the grammar extractor use different feature names: +#print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough decoder_config=cdec.ini diff --git a/training/dtrain/test/parallelize/g/grammar.out.0.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz Binary files differindex 1e28a24b..1e28a24b 100644 --- a/training/dtrain/test/parallelize/g/grammar.out.0.gz +++ b/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.1.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz Binary files differindex 372f5675..372f5675 100644 --- a/training/dtrain/test/parallelize/g/grammar.out.1.gz +++ b/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.2.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz Binary files differindex 145d0dc0..145d0dc0 100644 --- a/training/dtrain/test/parallelize/g/grammar.out.2.gz +++ b/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.3.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz Binary files differindex 105593ff..105593ff 100644 --- a/training/dtrain/test/parallelize/g/grammar.out.3.gz +++ b/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.4.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz Binary files differindex 30781f48..30781f48 100644 --- a/training/dtrain/test/parallelize/g/grammar.out.4.gz +++ b/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.5.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz Binary files differindex 834ee759..834ee759 100644 --- a/training/dtrain/test/parallelize/g/grammar.out.5.gz +++ b/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.6.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz Binary files differindex 2e76f348..2e76f348 100644 --- a/training/dtrain/test/parallelize/g/grammar.out.6.gz +++ b/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.7.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz Binary files differindex 3741a887..3741a887 100644 --- a/training/dtrain/test/parallelize/g/grammar.out.7.gz +++ b/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.8.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz Binary files differindex ebf6bd0c..ebf6bd0c 100644 --- a/training/dtrain/test/parallelize/g/grammar.out.8.gz +++ b/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz diff --git a/training/dtrain/test/parallelize/g/grammar.out.9.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz Binary files differindex c1791059..c1791059 100644 --- a/training/dtrain/test/parallelize/g/grammar.out.9.gz +++ b/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz diff --git a/training/dtrain/examples/parallelized/in b/training/dtrain/examples/parallelized/in new file mode 100644 index 00000000..51d01fe7 --- /dev/null +++ b/training/dtrain/examples/parallelized/in @@ -0,0 +1,10 @@ +<seg grammar="grammar/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg> +<seg grammar="grammar/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg> +<seg grammar="grammar/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg> +<seg grammar="grammar/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg> +<seg grammar="grammar/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg> +<seg grammar="grammar/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg> +<seg grammar="grammar/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg> +<seg grammar="grammar/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg> +<seg grammar="grammar/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg> +<seg grammar="grammar/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg> diff --git a/training/dtrain/test/parallelize/refs b/training/dtrain/examples/parallelized/refs index 632e27b0..632e27b0 100644 --- a/training/dtrain/test/parallelize/refs +++ b/training/dtrain/examples/parallelized/refs diff --git a/training/dtrain/examples/parallelized/work/out.0.0 b/training/dtrain/examples/parallelized/work/out.0.0 new file mode 100644 index 00000000..7a00ed0f --- /dev/null +++ b/training/dtrain/examples/parallelized/work/out.0.0 @@ -0,0 +1,61 @@ + cdec cfg 'cdec.ini' +Loading the LM will be faster if you build a binary file. +Reading ../example/nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** +Seeding random number sequence to 3121929377 + +dtrain +Parameters: + k 100 + N 4 + T 1 + scorer 'stupid_bleu' + sample from 'kbest' + filter 'uniq' + learning rate 0.0001 + gamma 0 + loss margin 1 + pairs 'XYX' + hi lo 0.1 + pair threshold 0 + select weights 'last' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg 'cdec.ini' + input 'work/shard.0.0.in' + refs 'work/shard.0.0.refs' + output 'work/weights.0.0' +(a dot represents 10 inputs) +Iteration #1 of 1. + 5 +WEIGHTS + Glue = +0.2663 + WordPenalty = -0.0079042 + LanguageModel = +0.44782 + LanguageModel_OOV = -0.0401 + PhraseModel_0 = -0.193 + PhraseModel_1 = +0.71321 + PhraseModel_2 = +0.85196 + PhraseModel_3 = -0.43986 + PhraseModel_4 = -0.44803 + PhraseModel_5 = -0.0538 + PhraseModel_6 = -0.1788 + PassThrough = -0.1477 + --- + 1best avg score: 0.17521 (+0.17521) + 1best avg model score: 21.556 (+21.556) + avg # pairs: 1671.2 + avg # rank err: 1118.6 + avg # margin viol: 552.6 + non0 feature count: 12 + avg list sz: 100 + avg f count: 11.32 +(time 0.37 min, 4.4 s/S) + +Writing weights file to 'work/weights.0.0' ... +done + +--- +Best iteration: 1 [SCORE 'stupid_bleu'=0.17521]. +This took 0.36667 min. diff --git a/training/dtrain/examples/parallelized/work/out.0.1 b/training/dtrain/examples/parallelized/work/out.0.1 new file mode 100644 index 00000000..e2bd6649 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/out.0.1 @@ -0,0 +1,62 @@ + cdec cfg 'cdec.ini' +Loading the LM will be faster if you build a binary file. +Reading ../example/nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** +Seeding random number sequence to 2767202922 + +dtrain +Parameters: + k 100 + N 4 + T 1 + scorer 'stupid_bleu' + sample from 'kbest' + filter 'uniq' + learning rate 0.0001 + gamma 0 + loss margin 1 + pairs 'XYX' + hi lo 0.1 + pair threshold 0 + select weights 'last' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg 'cdec.ini' + input 'work/shard.0.0.in' + refs 'work/shard.0.0.refs' + output 'work/weights.0.1' + weights in 'work/weights.0' +(a dot represents 10 inputs) +Iteration #1 of 1. + 5 +WEIGHTS + Glue = -0.2699 + WordPenalty = +0.080605 + LanguageModel = -0.026572 + LanguageModel_OOV = -0.30025 + PhraseModel_0 = -0.32076 + PhraseModel_1 = +0.67451 + PhraseModel_2 = +0.92 + PhraseModel_3 = -0.36402 + PhraseModel_4 = -0.592 + PhraseModel_5 = -0.0269 + PhraseModel_6 = -0.28755 + PassThrough = -0.33285 + --- + 1best avg score: 0.26638 (+0.26638) + 1best avg model score: 53.197 (+53.197) + avg # pairs: 2028.6 + avg # rank err: 998.2 + avg # margin viol: 918.8 + non0 feature count: 12 + avg list sz: 100 + avg f count: 10.496 +(time 0.32 min, 3.8 s/S) + +Writing weights file to 'work/weights.0.1' ... +done + +--- +Best iteration: 1 [SCORE 'stupid_bleu'=0.26638]. +This took 0.31667 min. diff --git a/training/dtrain/examples/parallelized/work/out.1.0 b/training/dtrain/examples/parallelized/work/out.1.0 new file mode 100644 index 00000000..6e790e38 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/out.1.0 @@ -0,0 +1,61 @@ + cdec cfg 'cdec.ini' +Loading the LM will be faster if you build a binary file. +Reading ../example/nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** +Seeding random number sequence to 1432415010 + +dtrain +Parameters: + k 100 + N 4 + T 1 + scorer 'stupid_bleu' + sample from 'kbest' + filter 'uniq' + learning rate 0.0001 + gamma 0 + loss margin 1 + pairs 'XYX' + hi lo 0.1 + pair threshold 0 + select weights 'last' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg 'cdec.ini' + input 'work/shard.1.0.in' + refs 'work/shard.1.0.refs' + output 'work/weights.1.0' +(a dot represents 10 inputs) +Iteration #1 of 1. + 5 +WEIGHTS + Glue = -0.3815 + WordPenalty = +0.20064 + LanguageModel = +0.95304 + LanguageModel_OOV = -0.264 + PhraseModel_0 = -0.22362 + PhraseModel_1 = +0.12254 + PhraseModel_2 = +0.26328 + PhraseModel_3 = +0.38018 + PhraseModel_4 = -0.48654 + PhraseModel_5 = +0 + PhraseModel_6 = -0.3645 + PassThrough = -0.2216 + --- + 1best avg score: 0.10863 (+0.10863) + 1best avg model score: -4.9841 (-4.9841) + avg # pairs: 1345.4 + avg # rank err: 822.4 + avg # margin viol: 501 + non0 feature count: 11 + avg list sz: 100 + avg f count: 11.814 +(time 0.45 min, 5.4 s/S) + +Writing weights file to 'work/weights.1.0' ... +done + +--- +Best iteration: 1 [SCORE 'stupid_bleu'=0.10863]. +This took 0.45 min. diff --git a/training/dtrain/examples/parallelized/work/out.1.1 b/training/dtrain/examples/parallelized/work/out.1.1 new file mode 100644 index 00000000..0b984761 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/out.1.1 @@ -0,0 +1,62 @@ + cdec cfg 'cdec.ini' +Loading the LM will be faster if you build a binary file. +Reading ../example/nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** +Seeding random number sequence to 1771918374 + +dtrain +Parameters: + k 100 + N 4 + T 1 + scorer 'stupid_bleu' + sample from 'kbest' + filter 'uniq' + learning rate 0.0001 + gamma 0 + loss margin 1 + pairs 'XYX' + hi lo 0.1 + pair threshold 0 + select weights 'last' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg 'cdec.ini' + input 'work/shard.1.0.in' + refs 'work/shard.1.0.refs' + output 'work/weights.1.1' + weights in 'work/weights.0' +(a dot represents 10 inputs) +Iteration #1 of 1. + 5 +WEIGHTS + Glue = -0.3178 + WordPenalty = +0.11092 + LanguageModel = +0.17269 + LanguageModel_OOV = -0.13485 + PhraseModel_0 = -0.45371 + PhraseModel_1 = +0.38789 + PhraseModel_2 = +0.75311 + PhraseModel_3 = -0.38163 + PhraseModel_4 = -0.58817 + PhraseModel_5 = -0.0269 + PhraseModel_6 = -0.27315 + PassThrough = -0.16745 + --- + 1best avg score: 0.13169 (+0.13169) + 1best avg model score: 24.226 (+24.226) + avg # pairs: 1951.2 + avg # rank err: 985.4 + avg # margin viol: 951 + non0 feature count: 12 + avg list sz: 100 + avg f count: 11.224 +(time 0.42 min, 5 s/S) + +Writing weights file to 'work/weights.1.1' ... +done + +--- +Best iteration: 1 [SCORE 'stupid_bleu'=0.13169]. +This took 0.41667 min. diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.in b/training/dtrain/examples/parallelized/work/shard.0.0.in new file mode 100644 index 00000000..92f9c78e --- /dev/null +++ b/training/dtrain/examples/parallelized/work/shard.0.0.in @@ -0,0 +1,5 @@ +<seg grammar="grammar/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg> +<seg grammar="grammar/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg> +<seg grammar="grammar/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg> +<seg grammar="grammar/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg> +<seg grammar="grammar/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg> diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.refs b/training/dtrain/examples/parallelized/work/shard.0.0.refs new file mode 100644 index 00000000..bef68fee --- /dev/null +++ b/training/dtrain/examples/parallelized/work/shard.0.0.refs @@ -0,0 +1,5 @@ +europe 's divided racial house +a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge . +the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them . +while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon . +an aging population at home and ever more open borders imply increasing racial fragmentation in european countries . diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.in b/training/dtrain/examples/parallelized/work/shard.1.0.in new file mode 100644 index 00000000..b7695ce7 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/shard.1.0.in @@ -0,0 +1,5 @@ +<seg grammar="grammar/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg> +<seg grammar="grammar/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg> +<seg grammar="grammar/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg> +<seg grammar="grammar/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg> +<seg grammar="grammar/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg> diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.refs b/training/dtrain/examples/parallelized/work/shard.1.0.refs new file mode 100644 index 00000000..6076f6d5 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/shard.1.0.refs @@ -0,0 +1,5 @@ +mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear . +it will not , as america 's racial history clearly shows . +race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes . +the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths . +this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us . diff --git a/training/dtrain/examples/parallelized/work/weights.0 b/training/dtrain/examples/parallelized/work/weights.0 new file mode 100644 index 00000000..ddd595a8 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.0 @@ -0,0 +1,12 @@ +LanguageModel 0.7004298992212881 +PhraseModel_2 0.5576194336478857 +PhraseModel_1 0.41787318415343155 +PhraseModel_4 -0.46728502545635164 +PhraseModel_3 -0.029839521598455515 +Glue -0.05760000000000068 +PhraseModel_6 -0.2716499999999978 +PhraseModel_0 -0.20831031065605327 +LanguageModel_OOV -0.15205000000000077 +PassThrough -0.1846500000000006 +WordPenalty 0.09636994553433414 +PhraseModel_5 -0.026900000000000257 diff --git a/training/dtrain/examples/parallelized/work/weights.0.0 b/training/dtrain/examples/parallelized/work/weights.0.0 new file mode 100644 index 00000000..c9370b18 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.0.0 @@ -0,0 +1,12 @@ +WordPenalty -0.0079041595706392243 +LanguageModel 0.44781580828279532 +LanguageModel_OOV -0.04010000000000042 +Glue 0.26629999999999948 +PhraseModel_0 -0.19299677809125185 +PhraseModel_1 0.71321026861732773 +PhraseModel_2 0.85195540993310537 +PhraseModel_3 -0.43986310822842656 +PhraseModel_4 -0.44802855630415955 +PhraseModel_5 -0.053800000000000514 +PhraseModel_6 -0.17879999999999835 +PassThrough -0.14770000000000036 diff --git a/training/dtrain/examples/parallelized/work/weights.0.1 b/training/dtrain/examples/parallelized/work/weights.0.1 new file mode 100644 index 00000000..8fad3de8 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.0.1 @@ -0,0 +1,12 @@ +WordPenalty 0.080605055841244472 +LanguageModel -0.026571720531022844 +LanguageModel_OOV -0.30024999999999141 +Glue -0.26989999999999842 +PhraseModel_2 0.92000295209089566 +PhraseModel_1 0.67450748692470841 +PhraseModel_4 -0.5920000014976784 +PhraseModel_3 -0.36402437203127397 +PhraseModel_6 -0.28754999999999603 +PhraseModel_0 -0.32076244202907672 +PassThrough -0.33284999999999004 +PhraseModel_5 -0.026900000000000257 diff --git a/training/dtrain/examples/parallelized/work/weights.1 b/training/dtrain/examples/parallelized/work/weights.1 new file mode 100644 index 00000000..03058a16 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.1 @@ -0,0 +1,12 @@ +PhraseModel_2 0.8365578543552836 +PhraseModel_4 -0.5900840266009169 +PhraseModel_1 0.5312000609786991 +PhraseModel_0 -0.3872342271319619 +PhraseModel_3 -0.3728279676912084 +Glue -0.2938500000000036 +PhraseModel_6 -0.2803499999999967 +PassThrough -0.25014999999999626 +LanguageModel_OOV -0.21754999999999702 +LanguageModel 0.07306061161169894 +WordPenalty 0.09576193325966899 +PhraseModel_5 -0.026900000000000257 diff --git a/training/dtrain/examples/parallelized/work/weights.1.0 b/training/dtrain/examples/parallelized/work/weights.1.0 new file mode 100644 index 00000000..6a6a65c1 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.1.0 @@ -0,0 +1,11 @@ +WordPenalty 0.20064405063930751 +LanguageModel 0.9530439901597807 +LanguageModel_OOV -0.26400000000000112 +Glue -0.38150000000000084 +PhraseModel_0 -0.22362384322085468 +PhraseModel_1 0.12253609968953538 +PhraseModel_2 0.26328345736266612 +PhraseModel_3 0.38018406503151553 +PhraseModel_4 -0.48654149460854373 +PhraseModel_6 -0.36449999999999722 +PassThrough -0.22160000000000085 diff --git a/training/dtrain/examples/parallelized/work/weights.1.1 b/training/dtrain/examples/parallelized/work/weights.1.1 new file mode 100644 index 00000000..f56ea4a2 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.1.1 @@ -0,0 +1,12 @@ +WordPenalty 0.1109188106780935 +LanguageModel 0.17269294375442074 +LanguageModel_OOV -0.13485000000000266 +Glue -0.3178000000000088 +PhraseModel_2 0.75311275661967159 +PhraseModel_1 0.38789263503268989 +PhraseModel_4 -0.58816805170415531 +PhraseModel_3 -0.38163156335114284 +PhraseModel_6 -0.27314999999999739 +PhraseModel_0 -0.45370601223484697 +PassThrough -0.16745000000000249 +PhraseModel_5 -0.026900000000000257 diff --git a/training/dtrain/examples/standard/README b/training/dtrain/examples/standard/README new file mode 100644 index 00000000..ce37d31a --- /dev/null +++ b/training/dtrain/examples/standard/README @@ -0,0 +1,2 @@ +Call `dtrain` from this folder with ../../dtrain -c dtrain.ini . + diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/examples/standard/cdec.ini index 0215416d..e1edc68d 100644 --- a/training/dtrain/test/example/cdec.ini +++ b/training/dtrain/examples/standard/cdec.ini @@ -3,6 +3,7 @@ add_pass_through_rules=true scfg_max_span_limit=15 intersection_strategy=cube_pruning cubepruning_pop_limit=200 +grammar=nc-wmt11.grammar.gz feature_function=WordPenalty feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz # all currently working feature functions for translation: diff --git a/training/dtrain/examples/standard/dtrain.ini b/training/dtrain/examples/standard/dtrain.ini new file mode 100644 index 00000000..a05e9c29 --- /dev/null +++ b/training/dtrain/examples/standard/dtrain.ini @@ -0,0 +1,24 @@ +input=./nc-wmt11.de.gz +refs=./nc-wmt11.en.gz +output=- # a weights file (add .gz for gzip compression) or STDOUT '-' +select_weights=avg # output average (over epochs) weight vector +decoder_config=./cdec.ini # config for cdec +# weights for these features will be printed on each iteration +print_weights= EgivenFCoherent SampleCountF CountEF MaxLexFgivenE MaxLexEgivenF IsSingletonF IsSingletonFE Glue WordPenalty PassThrough LanguageModel LanguageModel_OOV +# newer version of the grammar extractor use different feature names: +#print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough +stop_after=10 # stop epoch after 10 inputs + +# interesting stuff +epochs=2 # run over input 2 times +k=100 # use 100best lists +N=4 # optimize (approx) BLEU4 +scorer=stupid_bleu # use 'stupid' BLEU+1 +learning_rate=1.0 # learning rate, don't care if gamma=0 (perceptron) +gamma=0 # use SVM reg +sample_from=kbest # use kbest lists (as opposed to forest) +filter=uniq # only unique entries in kbest (surface form) +pair_sampling=XYX # +hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here +pair_threshold=0 # minimum distance in BLEU (here: > 0) +loss_margin=0 diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output new file mode 100644 index 00000000..8d72f4c3 --- /dev/null +++ b/training/dtrain/examples/standard/expected-output @@ -0,0 +1,1206 @@ + cdec cfg './cdec.ini' +Loading the LM will be faster if you build a binary file. +Reading ./nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** + Example feature: Shape_S00000_T00000 +Seeding random number sequence to 1511823303 + +dtrain +Parameters: + k 100 + N 4 + T 2 + scorer 'stupid_bleu' + sample from 'kbest' + filter 'uniq' + learning rate 1 + gamma 0 + loss margin 0 + pairs 'XYX' + hi lo 0.1 + pair threshold 0 + select weights 'avg' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg './cdec.ini' + input './nc-wmt11.de.gz' + refs './nc-wmt11.en.gz' + output '-' + stop_after 10 +(a dot represents 10 inputs) +Iteration #1 of 2. + . 10 +Stopping after 10 input sentences. +WEIGHTS + EgivenFCoherent = +0 + SampleCountF = +0 + CountEF = +0 + MaxLexFgivenE = +0 + MaxLexEgivenF = +0 + IsSingletonF = +0 + IsSingletonFE = +0 + Glue = -576 + WordPenalty = +417.79 + PassThrough = -1455 + LanguageModel = +5117.5 + LanguageModel_OOV = -1307 + --- + 1best avg score: 0.27697 (+0.27697) + 1best avg model score: -47918 (-47918) + avg # pairs: 1129.8 + avg # rank err: 581.9 + avg # margin viol: 0 + non0 feature count: 703 + avg list sz: 90.9 + avg f count: 100.09 +(time 0.33 min, 2 s/S) + +Iteration #2 of 2. + . 10 +WEIGHTS + EgivenFCoherent = +0 + SampleCountF = +0 + CountEF = +0 + MaxLexFgivenE = +0 + MaxLexEgivenF = +0 + IsSingletonF = +0 + IsSingletonFE = +0 + Glue = -622 + WordPenalty = +898.56 + PassThrough = -2578 + LanguageModel = +8066.2 + LanguageModel_OOV = -2590 + --- + 1best avg score: 0.37119 (+0.094226) + 1best avg model score: -1.3174e+05 (-83822) + avg # pairs: 1214.9 + avg # rank err: 584.1 + avg # margin viol: 0 + non0 feature count: 1115 + avg list sz: 91.3 + avg f count: 90.755 +(time 0.27 min, 1.6 s/S) + +Writing weights file to '-' ... +R:X:NX_sein:N1_its 61.5 +WordPenalty 658.17328732437022 +LanguageModel 6591.8747593425214 +LanguageModel_OOV -1948.5 +R:X:das_NX:this_N1 12 +R:X:NX_sein_NX:N1_from_ever_being_able_to_N2 30 +R:X:NX_bemühen:N1_effort 2.5 +RBS:X_bemühen 2.5 +R:X:sich:sich -17.5 +RBT:<r>_sich -17.5 +RBT:sich_</r> -17.5 +RBS:sich_X 17.5 +RBS:<r>_als 147 +RBS:als_</r> -59 +Shape_S10000_T10000 -1711.5 +RBT:<r>_when 84 +R:X:zum_NX:as_N1 -134 +RBS:<r>_zum -30 +R:X:als_NX:as_N1 63 +R:X:zum_NX:'s_N1 33 +R:X:zum_NX:the_N1 24 +RBS:X_sich -12 +R:X:zum_NX:to_N1 -36 +R:X:zum_NX:with_the_N1 83 +R:X:NX_zum:N1_the -66 +R:X:NX_zum:N1_to 66 +R:X:als_NX:when_N1 84 +RBS:als_das 59 +RBS:X_das -104 +R:X:NX_das:N1_a 28.5 +R:X:er_sich_NX:he_N1 86.5 +RBS:er_sich 29.5 +R:X:NX_das:N1_it -6 +R:X:er_sich_NX:him_N1 -57 +RBT:<r>_declared -488 +R:X:NX_das:N1_that -5 +RBT:declared_</r> -8 +R:X:NX_das:N1_the -57 +R:X:NX_das:N1_this -17 +R:X:NX_.:N1_. -323 +RBS:X_. 134 +R:X:NX_.:N1_debate_. 6.5 +R:X:NX_.:N1_disruptions_. -14.5 +R:X:NX_.:N1_established_. 7.5 +R:X:NX_.:N1_heading_. 17 +R:X:NX_.:N1_on_. 94 +R:X:NX_.:N1_pace_. 51.5 +R:X:NX_das_NX:N1_a_growing_N2 -45 +R:X:general:general -23.5 +R:X:NX_.:N1_politics_. 84 +R:X:NX_das_NX:N1_a_N2 -0.5 +R:X:NX_.:N1_power_. -99.5 +RBS:general_</r> -23.5 +R:X:NX_.:N1_-_range_missiles_. -28.5 +Shape_S11000_T11000 40 +RBT:general_</r> -23.5 +RBT:<r>_. -645 +R:X:betrat:entered -91 +R:X:NX_.:N1_war_. 68.5 +RBS:<r>_betrat 23.5 +Shape_S11000_T01100 475.5 +RBT:<r>_entered -91 +RBT:entered_</r> -91 +R:X:NX_das_NX:N1_the_N2 -2 +R:X:betrat:betrat 114.5 +RBT:<r>_betrat 114.5 +RBT:betrat_</r> 114.5 +R:X:12:12 79 +R:X:maßnahmen:action 24 +R:X:.:. -566 +RBS:12_</r> 79 +RBS:<r>_maßnahmen -44.5 +RBS:<r>_. -645 +RBT:._</r> -566 +RBT:<r>_action 24 +RBT:12_</r> 79 +RBT:action_</r> 24 +R:X:maßnahmen:actions -13 +RBT:<r>_actions -13 +RBT:actions_</r> -13 +R:X:12_NX:12_N1 -79 +RBT:declared_a -428 +RBS:12_X -79 +RBT:a_state -428 +RBT:state_of -428 +R:X:maßnahmen:maßnahmen -55.5 +R:X:internationale_NX:global_N1 -270 +RBS:X_am 316.5 +RBT:<r>_maßnahmen -55.5 +RBS:am_</r> 267.5 +RBT:maßnahmen_</r> -55.5 +RBS:<r>_den 883 +R:X:internationale_NX:international_N1 270 +RBS:den_X -286.5 +R:X:NX_am:N1_of 267.5 +R:X:NX_als:N1_a -273.5 +RBS:am_X -281 +R:X:den_NX:'s_N1 -31 +R:X:NX_am_NX:N1_of_N2 -30 +R:X:NX_am_NX:N1_on_N2 79 +R:X:NX_als:N1_'s 273.5 +R:X:NX_betrat:N1_entered -23.5 +R:X:ins_NX:into_the_N1 -32.5 +RBS:X_betrat -23.5 +RBT:into_the -55 +R:X:ins_NX:into_N1 32.5 +RBT:<r>_their 303 +R:X:general_NX:general_N1 23.5 +RBS:general_X 23.5 +RBS:<r>_am -316.5 +R:X:den_NX:the_N1 89 +R:X:den_NX_.:the_N1_. 86.5 +R:X:NX_und:and_N1 -216 +RBS:X_und -203.5 +RBS:und_</r> 522.5 +RBT:<r>_and 438.5 +R:X:am_NX:at_N1 23 +R:X:NX_als_das:N1_than_the 59 +R:X:NX_und:N1_- -114 +R:X:NX_und:N1_, 114 +R:X:am_NX:of_N1 -4 +R:X:am_NX:on_N1 -158.5 +R:X:am_NX:the_N1 -190 +RBS:<r>_seine -16.5 +RBS:seine_</r> 39 +R:X:oktober:october -79.5 +R:X:seine:his -5.5 +RBS:<r>_oktober -79.5 +R:X:seine:its 50 +RBT:<r>_october -79.5 +RBT:october_</r> -79.5 +R:X:seine_NX:a_N1 7.5 +RBS:seine_X -39 +R:X:NX_und_NX:and_N1_N2 -22 +RBS:und_X 160.5 +R:X:seine_NX:his_N1 -97 +R:X:seine_NX:its_N1 102.5 +R:X:NX_und_NX:N1_,_and_N2 -4 +R:X:NX_maßnahmen:N1_actions 44.5 +RBS:X_maßnahmen 44.5 +R:X:seine_NX_als:his_N1_than 5.5 +R:X:seine_NX_als:its_N1_as -64.5 +R:X:NX_und_NX:N1_,_N2 -7 +Shape_S01100_T11000 -312.5 +RBS:und_den -822.5 +Shape_S01100_T01100 -537.5 +Shape_S01100_T11100 15 +R:X:NX_seine:'s_N1 -5.5 +RBS:X_seine 16.5 +RBS:X_den -38 +R:X:amerika_NX_sich_NX:america_N1_N2 -12 +R:X:NX_seine_NX:'s_N1_N2 22 +R:X:auf_NX_den_NX:to_N1_the_N2 -23 +R:X:auf_NX_den_NX:to_N1_N2 -23 +RBS:<r>_unterstützen -716 +RBS:unterstützen_</r> -1 +Shape_S11100_T11000 783.5 +Shape_S11100_T01100 -716 +Shape_S11100_T11100 488 +R:X:unterstützen:unterstützen -1 +RBT:<r>_unterstützen -1 +RBT:unterstützen_</r> -1 +R:X:unterstützen_NX:support_N1 -715 +RBS:unterstützen_X -715 +RBT:<r>_will -6 +RBS:X_unterstützen 716 +RBT:<r>_if 35 +R:X:NX_den_NX_.:N1_N2_. 41 +R:X:verfassung:constitution 15 +RBS:<r>_verfassung -43 +RBT:<r>_constitution 15 +RBT:constitution_</r> 15 +R:X:verfassung:constitutional 9.5 +RBT:<r>_constitutional 9.5 +RBS:unterstützen_. 716 +RBT:constitutional_</r> 9.5 +R:X:NX_unterstützen_.:N1_. 716 +R:X:verfassung:verfassung -67.5 +R:X:eine_NX:an_N1 162 +RBT:<r>_verfassung -67.5 +RBT:verfassung_</r> -67.5 +R:X:und:, -21.5 +R:X:,_NX_zu_NX:to_N2_N1 -153 +RBS:<r>_und -389.5 +R:X:und:and -35 +RBS:angeführten_</r> -716 +RBT:and_</r> -35 +RBT:<r>_as 63 +RBS:versucht_</r> 68 +R:X:und:with -3 +R:X:eine_NX:is_N1 -162 +RBS:angeführten_X 716 +R:X:und:und 91 +RBT:<r>_und 91 +RBT:und_</r> 91 +R:X:versucht:tried 68 +RBT:tried_</r> 68 +RBS:versucht_X -68 +R:X:versucht_NX:tried_N1 -68 +R:X:und_NX:and_N1 250 +R:X:und_NX:with_N1 -18 +R:X:und_NX:,_N1 -7 +R:X:und_NX:N1_and -12 +R:X:und_den_NX:and_N1 -716 +R:X:er:he 17 +R:X:NX_eine:N1_is -7 +RBS:<r>_er -47.5 +RBS:er_</r> 54 +RBT:<r>_he 485.5 +RBT:he_</r> 17 +RBT:<r>_him -1 +R:X:und_NX_.:,_N1_. -3 +R:X:er:his 91 +R:X:und_den_NX_.:and_the_N1_. 88 +R:X:NX_eine:N1_will 7 +R:X:er:it 3 +R:X:und_den_NX_.:and_N1_. -216.5 +R:X:er:er -196 +RBT:<r>_er -196 +RBT:er_</r> -196 +RBS:er_X 8 +R:X:er_NX:he_N1 399 +R:X:er_NX:it_N1 -379 +Shape_S01010_T01010 -599 +RBS:pakistanischen_</r> 43 +R:X:NX_versucht:N1_tried 196 +RBT:<r>_pakistan -43 +RBT:<r>_pakistani 2 +R:X:er_NX_,_NX:he_N1_N2 -12 +R:X:NX_hat_er:N1_,_he_has 196 +RBS:hat_er 196 +R:X:NX_er:he_N1 -17 +RBS:X_er -148.5 +RBS:pakistanischen_X -43 +R:X:NX_er:it_N1 -7 +RBS:X_verfassung 43 +R:X:NX_verfassung:N1_'s_constitution 43 +R:X:NX_hat_NX_versucht:N1_N2_has_tried -190 +R:X:NX_hat_NX_versucht:N1_,_N2_has_tried -6 +RBS:der_pakistanischen 43 +RBS:X_pakistanischen -43 +RBS:<r>_aber 46 +RBS:,_als -147 +RBT:<r>_but -321 +R:X:aber_NX:but_N1 46 +R:X:von_NX_angeführten:N1_-_led -716 +R:X:von_NX_angeführten_NX:N1_-_led_N2 716 +RBS:,_aber -114 +RBS:X_aber 68 +R:X:,_als_NX:,_as_N1 -40 +R:X:NX_aber_NX_,:N1_N2_to 68 +R:X:NX_pakistanischen_NX_.:pakistan_N1_N2_. -43 +R:X:NX_,_aber_NX:N1_,_N2 -114 +RBS:<r>_rahmen 43 +RBS:rahmen_</r> 43 +R:X:rahmen:within 20 +R:X:rahmen:rahmen 23 +RBT:<r>_rahmen 23 +RBT:rahmen_</r> 23 +Shape_S01110_T11010 35.5 +R:X:NX_der_pakistanischen:N1_pakistan 43 +Shape_S01110_T01110 -1195 +Shape_S01110_T11110 -6.5 +R:X:NX_,_NX_er:N1_N2_he -33 +RBS:geben_X -577.5 +RBS:<r>_gestalten 196 +Shape_S01110_T01011 278 +RBS:gestalten_</r> 196 +RBS:geben_und 577.5 +R:X:gestalten:more 221 +Shape_S01110_T01111 -181.5 +RBT:<r>_more 221 +RBT:more_</r> 221 +R:X:gestalten:gestalten -25 +RBT:<r>_gestalten -25 +RBT:gestalten_</r> -25 +R:X:effektiver:effectively -151 +RBS:<r>_effektiver 54 +RBS:effektiver_</r> -221 +RBT:<r>_effectively -151 +RBT:effectively_</r> -151 +R:X:effektiver:effektiver -99 +RBT:<r>_effektiver -99 +RBT:effektiver_</r> -99 +Shape_S11110_T11010 -1130 +RBS:zu_geben -107.5 +R:X:effektiver_zu_NX:N1_effectively 304 +RBS:effektiver_zu 221 +RBS:X_geben 107.5 +Shape_S11110_T01110 621 +Shape_S11110_T11110 -75 +RBS:X_gestalten -196 +R:X:NX_gestalten_.:N1_. -196 +RBS:gestalten_. -196 +R:X:terror:terror 672 +RBS:<r>_terror -16 +RBS:terror_</r> 640 +R:X:den:- -4 +RBT:<r>_terror 136 +RBT:terror_</r> 646 +RBS:den_</r> 42.5 +R:X:den:for -11.5 +R:X:terror:terrorism -54 +RBT:<r>_terrorism -54 +Shape_S11110_T11011 -4.5 +RBT:terrorism_</r> -54 +R:X:terror_NX:terror_N1 -634 +R:X:den:of -17 +RBS:terror_X -640 +R:X:den:'s 32.5 +Shape_S11110_T01111 -1.5 +R:X:NX_effektiver:N1_more_effectively 29 +RBS:X_effektiver -54 +R:X:den:the 68 +R:X:NX_geben_und:N1_and 107.5 +R:X:NX_effektiver_zu_NX:N1_N2_effectively -83 +R:X:den:to -33 +RBS:1999_</r> -302.5 +R:X:,_NX_zu_geben_NX:to_N1_N2 -577.5 +R:X:den:with -10 +RBS:X_terror -4.5 +R:X:,_NX_zu_geben_und:to_N1_and 470 +R:X:NX_1999:N1_1999 -302.5 +R:X:NX_1999_NX:N2_N1_1999 302.5 +RBS:1999_X 302.5 +R:X:den_NX_zu:to_N1 783.5 +R:X:NX_rahmen_der:N1_the -43 +RBS:X_rahmen -43 +RBS:rahmen_der -43 +RBS:gegen_</r> 22.5 +R:X:gegen:against -2 +RBT:<r>_against -2 +RBT:against_</r> -2 +R:X:._NX:._N1 -79 +RBS:._X -79.5 +RBS:gegen_den -22.5 +R:X:NX_._oktober:october_N1 79.5 +RBS:._oktober 79.5 +R:X:am_NX_._NX:the_N2_N1 -0.5 +R:X:gegen_den_NX:on_N1 2 +RBS:den_terror 20.5 +RBT:on_terror -26 +R:X:NX_den_terror:the_N1_terror 29 +R:X:den_NX_den_NX:the_N1_N2 -110.5 +R:X:den_NX_den_NX:N2_the_N1 -95 +RBT:<unk>_the -1.5 +R:X:krieg:war -4.5 +RBS:<r>_krieg -22 +R:X:musharraf:musharraf 43 +RBS:krieg_</r> -4.5 +RBT:<r>_war -22 +RBS:<r>_musharraf 66.5 +RBS:musharraf_</r> -23.5 +RBT:war_</r> -4.5 +R:X:musharraf_NX:musharraf_imposed_N1 23.5 +RBS:musharraf_X 23.5 +RBT:musharraf_imposed 23.5 +RBS:krieg_gegen 4.5 +R:X:musharraf_NX:musharraf_N1 107 +R:X:krieg_gegen:war_on 24.5 +RBT:war_on -17.5 +RBS:X_gegen -4.5 +R:X:musharraf_NX_,_als_NX:musharraf_N1_as_N2 -20 +R:X:musharraf_NX_,_als_NX:musharraf_N1_N2 -87 +R:X:krieg_gegen_den_NX:war_on_N1 -16 +R:X:krieg_gegen_den_terror:war_on_terror -26 +R:X:pervez:pervez 22 +RBS:<r>_pervez 22 +RBS:pervez_</r> 57.5 +RBS:X_krieg 22 +RBT:<r>_pervez 22 +RBT:pervez_</r> 22 +RBS:pervez_musharraf -57.5 +RBS:X_musharraf -9 +R:X:NX_musharraf:N1_musharraf -9 +R:X:den_NX_gegen_den:the_N1_on -4.5 +R:X:den_NX_den_terror:the_N1_terror -3 +R:X:NX_krieg_gegen_den_terror:N1_war_on_terror 22 +R:X:den_NX_den_terror_NX:N2_the_N1_terror -1.5 +RBT:<r>_project 91 +RBS:hat_</r> 2 +RBS:X_- 14 +R:X:NX_-:,_N1 48.5 +R:X:NX_-:N1_months_of 32 +R:X:NX_-:N1_relief_and 64 +R:X:NX_-:N1_'s -144.5 +RBS:hat_X -198 +R:X:und_NX_terror_NX:and_N2_N1_terror -4.5 +RBT:and_<unk> -4.5 +R:X:sorgen:bring -19 +RBS:X_pervez -22 +RBT:<r>_bring -19 +RBT:bring_</r> -19 +R:X:sorgen:ensure 19 +RBT:<r>_ensure 19 +RBT:ensure_</r> 19 +R:X:NX_-_NX:N1_N2_security -4 +R:X:NX_projekt_NX:N2_N1_project -156 +R:X:NX_-_NX_.:N1_N2_. 18 +R:X:NX_projekt_NX_.:N2_N1_project_. 156 +RBS:<r>_- -14 +RBT:to_ensure 0.5 +R:X:NX_hat:has_N1 -5 +R:X:NX_hat:N1_, 3 +R:X:NX_hat:,_N1 21.5 +R:X:NX_hat:N1_has -17 +R:X:NX_hat:N1_is -0.5 +R:X:-_NX:of_N1 -26 +R:X:-_NX:'s_N1 -58 +R:X:NX_hat_NX:N1_,_N2 -73 +R:X:NX_hat_NX:N1_N2_has 28 +R:X:-_NX:-_N1 122 +R:X:NX_hat_NX:N1_,_N2_has 21 +R:X:-_NX:--_N1 -21 +R:X:-_NX:,_N1 -31 +R:X:stabilität:stability -118 +RBS:<r>_stabilität -129 +RBT:<r>_stability -118 +RBT:stability_</r> -118 +R:X:stabilität:stabilität -11 +RBT:<r>_stabilität -11 +RBT:stabilität_</r> -11 +RBT:<r>_country 253 +RBS:<r>_für 101 +RBS:für_</r> 129 +RBS:X_ihres -16 +R:X:NX_ihres_NX:N1_of_their_N2 -16 +R:X:für:that 129 +RBT:<r>_political -16 +RBS:für_X -129 +R:X:,_NX_und_NX:,_N1_N2 -2 +R:X:für_NX:to_N1 -28 +R:X:NX_stabilität:N1_stability 129 +RBS:X_stabilität 129 +RBS:X_für 22 +RBT:<unk>_with -109 +RBS:,_für -123 +R:X:,_für_NX:,_N1 15.5 +R:X:,_NX_den_NX_zu:to_N2_N1 69 +R:X:NX_für_NX_.:N1_N2_. 22 +RBS:<r>_ihres 16 +R:X:ihres_NX:its_N1 -50 +R:X:ihres_NX:their_N1 66 +R:X:NX_zu_verkaufen_NX:sell_N1_N2 140.5 +RBS:verkaufen_X 140.5 +RBS:<r>_würde -204 +RBS:würde_</r> -117 +R:X:würde:would -204 +RBS:würde_X 126 +R:X:in_NX_hat_NX:in_N1_N2 22 +R:X:NX_dem_NX_pervez:N1_N2_pervez 35.5 +RBS:<r>_halten 284 +RBS:halten_</r> 204 +R:X:NX_dem_NX_pervez_musharraf:N1_N2_pervez_musharraf -57.5 +Shape_S01111_T01011 560.5 +Shape_S01111_T11011 -20.5 +Shape_S01111_T01111 -5 +RBT:<r>_maintain 30 +R:X:halten:halten 284 +RBT:<r>_halten 284 +RBT:halten_</r> 284 +RBS:halten_X -204 +R:X:NX_würde:if_N1 35 +RBS:X_würde 204 +R:X:NX_würde:will_N1 -6 +Shape_S11111_T11010 69 +R:X:NX_würde:would_face_a_N1 -9.5 +RBT:would_face -18.5 +RBT:face_a -18.5 +Shape_S11111_T11110 -57 +R:X:NX_würde:would_N1 78 +R:X:NX_würde:N1_will -10.5 +R:X:NX_würde_NX:would_N1_N2 126 +R:X:NX_würde_.:would_face_a_N1_. -9 +RBS:würde_. -9 +PhraseModel_0 -2973.8953021225416 +R:X:vielleicht:may -177 +PhraseModel_1 -4012.0052074229625 +PhraseModel_2 -1203.5725821427027 +RBS:vielleicht_</r> -284 +PhraseModel_3 2747.8420998127522 +PhraseModel_4 -3205.3163436680484 +PhraseModel_5 720.5 +PhraseModel_6 275 +R:X:vielleicht:vielleicht -107 +RBT:<r>_vielleicht -107 +RBT:vielleicht_</r> -107 +R:X:vielleicht_NX:perhaps_N1 284 +RBS:vielleicht_X 284 +R:X:NX_halten:maintain_the_N1 -29 +RBS:X_halten -284 +RBT:maintain_the -174 +R:X:NX_halten:N1_hold -51 +R:X:NX_halten_NX:N2_maintain_the_N1 -204 +RBT:<unk>_maintain -204 +RBS:<r>_versprechen 30 +RBS:versprechen_</r> -75 +RBT:<r>_commitment 107 +R:X:versprechen_NX:commitment_N1 30 +RBS:versprechen_X 75 +R:X:NX_versprechen:N1_commitment -75 +RBS:X_versprechen -30 +R:X:NX_,_für_NX:N1_,_N2 -138.5 +R:X:NX_versprechen_NX:N1_commitment_N2 45 +RBS:<r>_dass -451 +RBS:dass_</r> -91.5 +R:X:dass_NX:that_N1 -451 +RBS:dass_X 91.5 +R:X:NX_er_sein:N1_to_make_up_for_his -91.5 +RBS:er_sein -91.5 +R:X:seine_NX_und:a_N1_, -15 +R:X:NX_,_NX_und:N1_N2_, 129 +RBS:,_dass 851.5 +R:X:NX_,_dass:N1_keep -27 +R:X:NX_,_dass:N1_said_that -0.5 +R:X:NX_,_dass:N1_to_let -9.5 +R:X:NX_dass:that_N1 -8.5 +RBS:X_dass -400.5 +R:X:NX_dass:N1_let -51.5 +R:X:NX_dass:N1_see -243.5 +R:X:NX_dass:N1_thought -97 +R:X:NX_,_dass_NX:N1_that_N2 134 +Glue -599 +PassThrough -2016.5 +R:X:musharrafs:his 2 +RBS:musharrafs_</r> -29 +R:X:NX_und_den:N1_and_the 22 +RBT:<r>_his 250.5 +RBT:his_</r> 160.5 +R:X:musharrafs:musharraf -1.5 +RBT:<r>_musharraf 135.5 +RBT:musharraf_</r> 41.5 +R:X:NX_,_dass_NX_.:N1_N2_. 91.5 +R:X:musharrafs:musharrafs -29.5 +RBT:<r>_musharrafs -29.5 +RBT:musharrafs_</r> -29.5 +RBS:sie_X 346 +RBS:<r>_X -1369.5 +R:X:dies:so -74.5 +RBS:X_</r> -1743 +RBS:dies_</r> -348 +R:X:dies:so_,_this 47 +RBT:so_, 47 +R:X:sie_NX:it_N1 22 +RBT:,_this 47 +R:X:dies:that -256.5 +R:X:NX_?:N1_? -134.5 +R:X:dies:these -5.5 +RBS:X_? -235 +RBT:<r>_these -5.5 +RBT:these_</r> -5.5 +R:X:NX_?:N1_consulting_? -100.5 +R:X:dies:this -58.5 +R:X:letzter_NX:last_N1 -14 +RBS:<r>_letzter -20 +RBS:letzter_X 19.5 +RBT:<r>_last -2 +R:X:letzter:last 7 +RBS:letzter_</r> -19.5 +R:X:sein:be 1.5 +RBT:last_</r> 7 +R:X:letzter:late 11.5 +RBT:<r>_they -6 +RBS:sein_</r> 68 +RBT:<r>_late 11.5 +R:X:ist_NX:be_N1 464.5 +RBT:<r>_be -10.5 +RBT:late_</r> 11.5 +R:X:sie_NX:they_N1 -22 +RBS:<r>_ist 415.5 +RBT:be_</r> 120 +R:X:letzter:letzter -24.5 +RBS:ist_X 8 +R:X:sein:being -16 +RBT:<r>_letzter -24.5 +R:X:ist_NX:has_N1 16 +RBT:<r>_being -79 +RBT:letzter_</r> -24.5 +R:X:ist_NX:is_at_N1 6 +RBT:being_</r> -16 +R:X:musharrafs_NX:his_N1 -25 +R:X:sein:his 73 +RBS:musharrafs_X 29 +R:X:ist_NX:is_well_N1 6 +R:X:sein:its -15.5 +R:X:musharrafs_NX:musharraf_'s_N1 77.5 +R:X:sein:sein 55 +RBT:musharraf_'s 55.5 +R:X:ist_NX:is_N1 23 +RBT:<r>_sein 55 +R:X:musharrafs_NX:musharraf_N1 -23.5 +R:X:ist_NX:more_N1 -130.5 +RBT:sein_</r> 55 +R:X:NX_letzter:N1_late -26.5 +R:X:ist_NX:N1_be 176 +R:X:ziel:aim -32.5 +RBS:X_letzter 20 +R:X:ist_NX:N1_has -67 +RBS:<r>_ziel -143 +R:X:NX_letzter:N1_'s_last 13 +R:X:ist_NX:N1_is -19 +RBS:ziel_</r> -219 +R:S:NS_NX:N1_N2 -599 +R:X:ist_NX:N1_,_is 18 +RBT:<r>_aim -32.5 +RBS:<r>_S -599 +R:X:ist_NX:N1_it_is 49 +RBT:aim_</r> -32.5 +RBS:S_X -599 +R:X:ist:are -65.5 +R:X:ziel:goal 45 +R:X:NX_letzter_NX:N1_'s_last_N2 33.5 +RBS:ist_</r> -8 +RBT:<r>_goal 45 +R:X:?:? 235 +RBT:goal_</r> 45 +RBS:<r>_? 235 +R:X:ziel:target -22.5 +RBT:<r>_? 235 +RBS:X__ -347 +RBT:<r>_target -22.5 +RBT:?_</r> 235 +RBT:target_</r> -22.5 +R:X:ist:'s -61 +R:X:ziel:targets -18 +RBS:in_</r> -22 +RBT:<r>_targets -18 +RBT:targets_</r> -18 +RBT:<r>_, 24.5 +R:X:ziel:ziel -125 +RBT:,_</r> -38 +R:X:NX___NX:N1___N2 -347 +R:X:dies_NX:so_N1 200 +RBT:<r>_ziel -125 +RBS:dies_X 256 +RBT:ziel_</r> -125 +RBT:<r>_at 23 +R:X:dies_NX:this_to_N1 156.5 +R:X:ziel_NX:goal_N1 49 +RBT:this_to 156.5 +RBS:ziel_X 219 +R:X:dies_NX:this_N1 -100.5 +R:X:ziel_NX:targets_N1 -19 +R:X:dies_ist:could_be 118.5 +R:X:ziel_NX:target_N1 -20 +RBS:dies_ist 92 +R:X:sein_NX:being_able_to_N1 -71.5 +RBT:in_</r> -65.5 +R:X:in:for 31 +RBT:<r>_could 118.5 +RBS:sein_X -68 +RBT:could_be 118.5 +RBT:being_able -63 +RBT:<r>_for 14.5 +RBT:able_to -63 +RBT:for_</r> 14.5 +R:X:sein_NX:be_N1 -10 +R:X:sein_NX:his_N1 184.5 +RBS:X_ist -507.5 +R:X:sein_NX:its_N1 -26.5 +R:X:in:in -53 +R:X:sein_NX:N1_be -174.5 +R:X:NX_ziel:N1_aim -32.5 +RBT:<r>_in -75.5 +RBS:X_ziel 143 +R:X:NX_ziel:N1_goal 20 +R:X:NX_ziel:N1_target -26.5 +R:X:NX_ziel:N1_targets -27 +RBT:<r>_into -270 +R:X:NX_ziel_NX:N1_goal_N2 60 +R:X:NX_ziel_NX:N1_targets_N2 -6 +R:X:NX_sie_NX_,_dass:N1_N2_that 346 +R:X:NX_ziel_NX:N1_target_N2 -6 +R:X:dies_ist_NX:this_is_N1 -26.5 +R:X:NX_ziel_NX:N2_N1_goal 161 +RBT:<r>_of -38 +RBT:of_</r> -17 +R:X:NX_ist_NX:is_N1_N2 -129 +RBS:<r>_die 428.5 +R:X:NX_ist_NX:is_N1_,_N2 16.5 +RBS:die_</r> -116 +RBT:<r>_on -653.5 +RBT:on_</r> 84.5 +R:X:NX_ist_NX:'s_N1_N2 -41.5 +R:X:die:, -9 +RBT:<r>_over 45 +R:X:die:a -5 +R:X:NX_ist_NX:N1_has_N2 -104.5 +R:X:blieben_NX:remained_N1 135 +R:X:die:an -123 +R:X:NX_ist_NX:N1_is_at_N2 -5.5 +RBS:<r>_blieben 187.5 +R:X:NX_ist_NX:N1_is_well_N2 -5 +RBS:blieben_X -13 +RBT:<r>_are -65.5 +RBT:<r>_'s 16 +R:X:NX_ist_NX:N1_is_N2 -31 +RBT:are_</r> -65.5 +RBT:'s_</r> -28.5 +R:X:blieben_NX:N1_remained 81.5 +R:X:NX_ist_NX:N1_,_is_N2 59.5 +R:X:die:by -10 +R:X:die:its 302.5 +RBS:<r>_pakistanis 57 +RBS:pakistanis_</r> 116.5 +RBT:<r>_to 93.5 +RBT:<r>_pakistanis 161 +R:X:NX_ist_NX:N1_N2_has -75 +R:X:die:the -28 +RBT:to_</r> 18 +R:X:NX_ist_NX:N1_N2_is -97.5 +R:X:pakistanis_NX:pakistanis_N1 57 +R:X:NX_ist_NX:N1_,_N2_is -1 +RBT:<r>_those -6 +RBT:<r>_within 20 +RBT:within_</r> 20 +RBS:pakistanis_X -116.5 +R:X:NX_blieben_NX:N1_,_N2_remained -229.5 +R:X:NX_ist_NX:N2_is_N1 -47 +RBS:X_blieben -187.5 +RBT:<unk>_is -21 +R:X:NX_pakistanis:pakistanis_,_N1 235.5 +RBS:X_pakistanis -57 +RBT:pakistanis_, 104 +R:X:NX_pakistanis:N1_pakistanis -119 +R:X:NX_ist_NX:N2_N1_is -46.5 +RBS:blieben_</r> 13 +RBT:<r>_is -251 +R:X:blieben:blieben -29 +RBT:<r>_blieben -29 +RBT:blieben_</r> -29 +R:X:NX_pakistanis_NX:pakistanis_,_N1_,_N2 -23 +RBS:<r>_zu -560 +R:X:NX_pakistanis_NX:N1_pakistanis_N2 -150.5 +RBS:zu_X -717.5 +R:X:NX_blieben:N1_,_remained 42 +RBS:<r>__ 347 +RBS:<r>_ein 37.5 +RBS:ein_</r> -9.5 +RBS:der_</r> -88.5 +R:X:zu_NX:for_N1 43 +R:X:__NX:__N1 -97 +RBT:<r>_- 113 +RBT:-_</r> -4 +R:X:__NX:,_N1 444 +R:X:zu_NX:in_N1 37.5 +RBT:<r>_a -27.5 +RBT:a_</r> -5 +RBS:sie_</r> -346 +RBT:the_</r> 40 +R:X:zu_NX:to_N1 -716 +R:X:zu_NX:with_N1 40.5 +R:X:zu_NX:N1_on 30 +RBT:<r>_the 324.5 +R:X:NX_sie:but_N1 -346 +RBS:X_ein -37.5 +RBT:be_transformed -12 +R:X:medien:media 299.5 +RBS:<r>_medien -71.5 +RBT:<r>_with 54.5 +RBS:medien_</r> -156 +RBT:with_</r> -19 +RBT:<r>_media 299.5 +R:X:NX_ein:N1_has_an -3.5 +RBT:media_</r> 299.5 +R:X:NX_ein:N1_put_forward_a -6 +R:X:medien:medien -371 +RBT:<r>_medien -371 +RBT:medien_</r> -371 +RBS:der_X 45 +RBS:medien_X 156 +R:X:NX_zu_NX:in_N2_N1 -9.5 +RBS:X_zu 339 +RBT:in_<unk> -2.5 +R:X:NX_zu_NX:of_N2_N1 -52.5 +RBT:to_<unk> -102.5 +RBT:<unk>_to 30 +R:X:,_dass_NX:that_N1 317 +R:X:NX_zu_NX:to_N2_N1 19 +R:X:NX_zu_NX:N1_in_N2 -2 +R:X:NX_zu_NX:N1_is_N2 -2 +RBS:X_macht -0.5 +R:X:NX_zu_NX:N1_to_N2 48 +R:X:NX_macht_NX:N1_N2_does -0.5 +R:X:NX_zu_NX:N2_N1_to -28 +R:X:NX_zu_NX_.:to_N2_N1_. 22.5 +RBS:an_</r> 28 +R:X:NX_zu_NX_.:N1_is_N2_. -3.5 +R:X:NX_zu_NX_.:N1_to_N2_. 7.5 +R:X:NX_zu_NX_.:N1_with_N2_. -3 +R:X:NX_zu_NX_.:N1_N2_. -221.5 +R:X:NX_zu_NX_.:N2_N1_. 4.5 +R:X:freien:free -83.5 +RBS:<r>_freien -118 +RBS:freien_</r> -201.5 +RBT:<r>_free 210 +RBT:free_</r> -83.5 +R:X:freien:freien -276 +RBT:<r>_freien -276 +RBT:freien_</r> -276 +RBT:<r>_an 31.5 +R:X:freien_NX:free_N1 248 +RBT:an_</r> -123 +RBS:freien_X 201.5 +R:X:NX_medien:N1_media -90 +RBS:X_medien 71.5 +R:X:amerika:america 193 +RBS:<r>_amerika -36 +R:X:NX_medien_NX:N2_N1_media 5 +R:X:an_NX:in_N1 210 +R:X:freien_NX_.:free_N1_. -6.5 +RBS:amerika_</r> -131 +R:X:NX_medien_NX_.:N2_N1_media_. 151 +RBT:<r>_america 283.5 +RBT:america_</r> 193 +R:X:die_NX:an_N1 -7.5 +R:X:amerika:american -3 +RBS:die_X -45.5 +RBT:<r>_american -3 +RBT:american_</r> -3 +R:X:amerika:amerika -321 +RBS:<r>_jener 62.5 +R:X:die_NX:a_N1 19 +RBT:<r>_amerika -321 +RBS:jener_X 62.5 +RBT:amerika_</r> -321 +R:X:jener_NX:the_N1 62.5 +R:X:an_NX:to_N1 -210 +RBS:X_jener -62.5 +RBS:amerika_X 131 +R:X:amerika_NX:america_N1 107 +R:X:die_NX:is_N1 -2.5 +RBS:an_der -28 +R:X:auf:, -5 +R:X:die_NX:its_N1 -14 +RBS:auf_</r> 46.5 +R:X:die_NX:'s_N1 46.5 +RBS:X_der 71 +R:X:NX_der:N1_for -74 +R:X:NX_der:N1_in -43 +R:X:auf:in -5.5 +RBT:<r>_choice -103 +R:X:die_NX:the_N1 -86.5 +RBT:<r>_decision 103 +R:X:auf:on 60 +R:X:die_NX:those_N1 -6 +R:X:NX_der:N1_to 72 +R:X:entscheidung_NX:choice_is_N1 -103 +R:X:die_NX:with_N1 73.5 +R:X:auf:auf -3 +RBT:choice_is -103 +RBT:<r>_auf -3 +R:X:entscheidung_NX:decision_N1 103 +R:X:die_NX:,_N1 57 +R:X:die_NX:N1_is -0.5 +RBT:auf_</r> -3 +R:X:die_NX:N1_'s -1 +RBS:auf_X -46.5 +R:X:die_NX:N1_the -1 +R:X:NX_freien:N1_free 158 +RBT:of_<unk> -13 +RBS:X_freien 118 +R:X:NX_der_NX:over_N2_N1 45 +R:X:NX_freien_NX:N1_free_N2 -34 +R:X:NX_freien_NX:N1_free_,_N2 -6 +RBT:over_<unk> 45 +R:X:die_NX_medien:the_N1_media 5.5 +R:X:auf_NX:in_N1 -46.5 +RBT:the_<unk> -0.5 +R:X:auf_NX:on_N1 66 +R:X:auf_NX:to_N1 -2 +R:X:auf_NX:,_N1 -18 +RBS:X_amerika 36 +RBT:<r>_may -177 +RBS:und_die 139.5 +RBT:may_</r> -177 +RBT:<r>_<unk> 585.5 +RBT:<r>_would -18.5 +RBS:X_die -568 +RBT:would_</r> -204 +R:X:NX_die:the_N1 34.5 +R:X:NX_amerika_NX:N2_N1_america 36 +R:X:terroranschläge:terrorist -22 +R:X:NX_die:,_N1 -42 +R:X:NX_die:N1_, -173 +RBS:<r>_terroranschläge -161.5 +RBS:der_macht 0.5 +R:X:NX_die:-_N1 -5 +RBS:terroranschläge_</r> -46 +R:X:NX_die:N1_a -1 +RBT:<r>_terrorist -119.5 +R:X:NX_der_macht_NX:N1_hold_N2_power 28 +RBT:terrorist_</r> -22 +R:X:,:, -2.5 +RBT:terrorist_attacks 77.5 +RBS:<r>_, -182 +RBT:attacks_</r> 28 +RBS:,_</r> -160.5 +R:X:terroranschläge:terroranschläge -52 +RBT:<r>_terroranschläge -52 +RBT:<r>__ -139 +RBT:terroranschläge_</r> -52 +R:X:NX_die:N1_its -128.5 +RBS:terroranschläge_X 46 +RBT:<r>_-- -64 +R:X:terroranschläge_NX:terrorist_attacks_N1 -87.5 +RBT:<r>_by -10 +RBT:by_</r> -10 +R:X:,:out -3.5 +RBT:<r>_out -3.5 +R:X:und_die_NX:and_N1 218 +RBT:out_</r> -3.5 +RBT:<r>_that -261.5 +R:X:NX_die_NX:the_N1_N2 -1 +RBT:that_</r> -127.5 +R:X:NX_die_NX:the_N2_N1 -4 +RBS:,_X -335 +RBT:,_as -40 +R:X:,_NX:in_N1 -239 +R:X:,_NX:of_N1 -4 +R:X:,_NX:on_N1 -166 +R:X:,_NX:to_N1 649 +R:X:NX_die_NX:N1_the_N2 -4 +R:X:,_NX:,_N1 -399 +R:X:,_NX:__N1 -42 +R:X:,_NX:--_N1 -102 +R:X:,_an:to 28 +RBS:,_an 28 +R:X:NX_die_NX:N1_,_N2 -5 +R:X:NX_die_NX:N1_N2_the -4 +RBS:X_an -28 +RBS:die_terroranschläge 161.5 +R:X:die_terroranschläge:,_terrorist_attacks 28 +RBT:,_terrorist 175 +R:X:die_terroranschläge_NX:,_terrorist_attacks_N1 147 +R:X:NX_so:N1_as -1.5 +R:X:justiz:judiciary -90 +RBS:<r>_justiz -1 +RBS:justiz_</r> -220.5 +R:X:NX_so:N1_that -14 +RBT:<r>_judiciary 215 +R:X:NX_so:N1_the 15.5 +RBT:judiciary_</r> -90 +R:X:justiz:justiz -216 +RBT:<r>_justiz -216 +RBT:justiz_</r> -216 +R:X:justiz_NX:judiciary_N1 305 +RBS:justiz_X 205 +RBS:<r>_brachten -28 +RBS:justiz_und 15.5 +RBS:brachten_</r> -175 +R:X:NX_und_die:'s_N1_and -5 +R:X:brachten:brachten -175 +RBT:<r>_brachten -175 +RBT:brachten_</r> -175 +R:X:NX_an_der:N1_the -0.5 +R:X:brachten_NX:N1_brought 147 +RBS:brachten_X 175 +R:X:NX_die_terroranschläge_NX:,_terrorist_attacks_N2_N1 -13.5 +R:X:NX_und_die:N1_'s -12 +R:X:NX_und_die_NX:'s_N2_N1 -16 +RBS:<r>_2001 -14.5 +RBS:2001_</r> 28 +RBT:<r>_2001 37.5 +R:X:NX_und_die_NX:N1_and_N2 -159 +RBT:2001_</r> 28 +R:X:2001_NX:2001_N1 147 +RBS:2001_X -28 +R:X:NX_brachten_NX:N1_N2_brought 28 +RBS:X_brachten 28 +RBT:,_<unk> -109.5 +R:X:2001_NX_die_NX:2001_,_N2_N1 -161.5 +R:X:unabhängige:independent 38 +RBT:2001_, -109.5 +RBS:<r>_unabhängige 127 +RBS:unabhängige_</r> -197 +RBT:<r>_independent 343 +RBT:independent_</r> 38 +RBT:<r>_september -13.5 +R:X:unabhängige:unabhängige -198 +RBT:<r>_unabhängige -198 +RBS:ein_X 9.5 +RBT:unabhängige_</r> -198 +RBS:september_X -14.5 +R:X:unabhängige_NX:independent_N1 287 +R:X:ein_NX:an_N1 132 +R:X:ein_NX:any_N1 25 +RBS:unabhängige_X 197 +R:X:NX_justiz:N1_judiciary 85.5 +R:X:NX_an_der_macht_NX:N1_of_power_N2 -27.5 +RBS:X_justiz 1 +R:X:NX_justiz_NX:N1_judiciary_N2 -43 +R:X:NX_justiz_und:N1_judiciary_and 15.5 +RBS:<r>_11 -13.5 +R:X:NX_unabhängige:N1_independent -37 +R:X:ein_NX:a_N1 -93 +RBS:X_unabhängige -127 +R:X:ein_NX:one_N1 -15 +R:X:NX_unabhängige_NX:N1_independent_N2 -90 +R:X:ein_NX:-_N1 -11.5 +R:X:NX_ein_NX:an_N1_N2 -6 +R:X:NX_ein_NX:be_transformed_N1_N2 -22 +RBS:X_, -3.5 +RBS:september_2001 14.5 +RBT:,_2001 14.5 +R:X:NX_,:to_N1 68 +R:X:NX_,:N1__ 1 +R:X:NX_,:N1_-- -172.5 +R:X:11_._september_2001_NX:september_11_,_2001_N1 -13.5 +R:X:die_NX_und_NX:the_N1_N2 -10 +R:X:NX_,:N1_for -127.5 +R:X:NX_,:N1_in -13.5 +R:X:NX_,:N1_of -55 +R:X:NX_,:N1_on 257.5 +R:X:NX_,:N1_out -58 +RBS:am_11 13.5 +R:X:die_NX_justiz_NX_die:the_N1_judiciary_N2 -57 +R:X:NX_,:N1_refuses_to -232.5 +R:X:die_NX_und_die:the_N1_and 148 +R:X:die_NX_und_die:the_N1_and_the -2.5 +RBT:the_september 13.5 +R:X:die_NX_die_NX:the_N1_N2 -3 +R:X:am_11_._september_NX:the_september_11_,_N1 -14.5 +R:X:die_NX_und_die_NX:the_N1_and_N2 -32 +RBS:zu_</r> 672 +R:X:NX_,_NX:N1_,_N2 -78 +R:X:NX_,_NX:N1_N2_, 80 +R:X:am_11_._september_2001:the_september_11_,_2001 28 +R:X:zu:for -5 +R:X:zu:in -7 +R:X:zu:to 23 +R:X:taliban:taliban -251.5 +RBS:<r>_taliban -223.5 +RBS:taliban_</r> -157.5 +R:X:zu:with -6 +R:X:verzweifelten:desperate 28.5 +RBT:<r>_taliban -205.5 +RBT:<r>_desperate 28.5 +RBT:taliban_</r> -107 +RBT:desperate_</r> 28.5 +R:X:taliban_NX:taliban_N1 28 +R:X:verzweifelten:verzweifelten -28.5 +RBS:taliban_X 157.5 +R:X:NX_zu:to_N1 -229 +RBT:<r>_verzweifelten -28.5 +R:X:den_taliban:the_taliban 144.5 +RBT:verzweifelten_</r> -28.5 +RBS:den_taliban 223.5 +RBT:the_taliban 144.5 +R:X:NX_zu:N1_for -152 +R:X:NX_zu:N1_in -6 +R:X:NX_zu:N1_is 251 +R:X:NX_zu:N1_of -49.5 +RBS:<r>_dem 22 +RBT:<r>_its 458 +RBT:its_</r> 337 +R:X:NX_den_taliban:N1_taliban -50.5 +R:X:NX_den_taliban_NX:N1_taliban_N2 -2.5 +R:X:NX_den_taliban_NX:N2_N1_taliban 132 +R:X:erklärte:declared -8 +RBS:<r>_erklärte -185.5 +RBS:erklärte_</r> -124.5 +RBT:<r>_declaring -9 +R:X:erklärte:erklärte -116.5 +RBT:<r>_erklärte -116.5 +RBT:erklärte_</r> -116.5 +R:X:erklärte_NX:declared_N1 -52 +RBS:erklärte_X -61 +RBS:jener_</r> -62.5 +R:X:erklärte_NX:declaring_N1 -9 +RBS:erklärte_, 185.5 +R:X:NX_jener:N1_of -62.5 +R:X:dem_NX:the_N1 22 +R:X:verkaufen:sell -153 +RBS:<r>_verkaufen -153 +RBS:verkaufen_</r> -140.5 +RBT:sell_</r> -153 +RBS:bereit_</r> 86 +RBS:zu_verkaufen 153 +RBS:<r>_bemühen -2.5 +R:X:bereit:bereit 86 +RBT:<r>_bereit 86 +RBT:bereit_</r> 86 +R:X:bereit_NX:ready_N1 -31 +RBS:bereit_X -86 +R:X:bereit_NX:N1_ready -55 +RBS:X_zum 30 +R:X:bemühen:bemühen -2.5 +R:X:NX_erklärte_,:N1_, 110 +RBT:<r>_bemühen -2.5 +RBS:X_erklärte 185.5 +RBT:bemühen_</r> -2.5 +R:X:NX_erklärte_,_NX:N1_,_N2 75.5 +RBS:in_X 22 +RBS:<r>_sich -17.5 +R:X:NX_zu_verkaufen:sell_N1 12.5 +RBS:sich_</r> -17.5 +R:X:NX_zum_NX:N2_to_further_N1 30 +RBS:<r>_das 45 +RBS:das_</r> 2.5 +RBT:to_further 30 +RBT:<r>_it -381 +RBT:it_</r> 3 +RBT:<r>_so 172.5 +RBT:so_</r> -74.5 +RBT:<r>_this 9.5 +RBT:this_</r> -11.5 +RBS:X_dem -22 +R:X:das_NX:a_growing_N1 77 +RBS:das_X -2.5 +RBT:a_growing -41 +R:X:das_NX:be_N1 169 +R:X:das_NX:its_N1 -95 +R:X:das_NX:so_N1 -38 +RBS:X_sein 91.5 +R:X:das_NX:the_N1 -80 +done + +--- +Best iteration: 2 [SCORE 'stupid_bleu'=0.37119]. +This took 0.6 min. diff --git a/training/dtrain/examples/standard/nc-wmt11.de.gz b/training/dtrain/examples/standard/nc-wmt11.de.gz Binary files differnew file mode 100644 index 00000000..0741fd92 --- /dev/null +++ b/training/dtrain/examples/standard/nc-wmt11.de.gz diff --git a/training/dtrain/examples/standard/nc-wmt11.en.gz b/training/dtrain/examples/standard/nc-wmt11.en.gz Binary files differnew file mode 100644 index 00000000..1c0bd401 --- /dev/null +++ b/training/dtrain/examples/standard/nc-wmt11.en.gz diff --git a/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz b/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz Binary files differnew file mode 100644 index 00000000..7ce81057 --- /dev/null +++ b/training/dtrain/examples/standard/nc-wmt11.en.srilm.gz diff --git a/training/dtrain/examples/standard/nc-wmt11.grammar.gz b/training/dtrain/examples/standard/nc-wmt11.grammar.gz Binary files differnew file mode 100644 index 00000000..ce4024a1 --- /dev/null +++ b/training/dtrain/examples/standard/nc-wmt11.grammar.gz diff --git a/training/dtrain/test/toy/cdec.ini b/training/dtrain/examples/toy/cdec.ini index 98b02d44..b14f4819 100644 --- a/training/dtrain/test/toy/cdec.ini +++ b/training/dtrain/examples/toy/cdec.ini @@ -1,2 +1,3 @@ formalism=scfg add_pass_through_rules=true +grammar=grammar.gz diff --git a/training/dtrain/test/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini index a091732f..cd715f26 100644 --- a/training/dtrain/test/toy/dtrain.ini +++ b/training/dtrain/examples/toy/dtrain.ini @@ -1,5 +1,6 @@ -decoder_config=test/toy/cdec.ini -input=test/toy/input +decoder_config=cdec.ini +input=src +refs=tgt output=- print_weights=logp shell_rule house_rule small_rule little_rule PassThrough k=4 diff --git a/training/dtrain/examples/toy/expected-output b/training/dtrain/examples/toy/expected-output new file mode 100644 index 00000000..1da2aadd --- /dev/null +++ b/training/dtrain/examples/toy/expected-output @@ -0,0 +1,77 @@ +Warning: hi_lo only works with pair_sampling XYX. + cdec cfg 'cdec.ini' +Seeding random number sequence to 1664825829 + +dtrain +Parameters: + k 4 + N 4 + T 2 + scorer 'bleu' + sample from 'kbest' + filter 'uniq' + learning rate 1 + gamma 0 + loss margin 0 + pairs 'all' + pair threshold 0 + select weights 'last' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg 'cdec.ini' + input 'src' + refs 'tgt' + output '-' +(a dot represents 10 inputs) +Iteration #1 of 2. + 2 +WEIGHTS + logp = +0 + shell_rule = -1 + house_rule = +2 + small_rule = -2 + little_rule = +3 + PassThrough = -5 + --- + 1best avg score: 0.5 (+0.5) + 1best avg model score: 2.5 (+2.5) + avg # pairs: 4 + avg # rank err: 1.5 + avg # margin viol: 0 + non0 feature count: 6 + avg list sz: 4 + avg f count: 2.875 +(time 0 min, 0 s/S) + +Iteration #2 of 2. + 2 +WEIGHTS + logp = +0 + shell_rule = -1 + house_rule = +2 + small_rule = -2 + little_rule = +3 + PassThrough = -5 + --- + 1best avg score: 1 (+0.5) + 1best avg model score: 5 (+2.5) + avg # pairs: 5 + avg # rank err: 0 + avg # margin viol: 0 + non0 feature count: 6 + avg list sz: 4 + avg f count: 3 +(time 0 min, 0 s/S) + +Writing weights file to '-' ... +house_rule 2 +little_rule 3 +Glue -4 +PassThrough -5 +small_rule -2 +shell_rule -1 +done + +--- +Best iteration: 2 [SCORE 'bleu'=1]. +This took 0 min. diff --git a/training/dtrain/examples/toy/grammar.gz b/training/dtrain/examples/toy/grammar.gz Binary files differnew file mode 100644 index 00000000..8eb0d29e --- /dev/null +++ b/training/dtrain/examples/toy/grammar.gz diff --git a/training/dtrain/examples/toy/src b/training/dtrain/examples/toy/src new file mode 100644 index 00000000..87e39ef2 --- /dev/null +++ b/training/dtrain/examples/toy/src @@ -0,0 +1,2 @@ +ich sah ein kleines haus +ich fand ein kleines haus diff --git a/training/dtrain/examples/toy/tgt b/training/dtrain/examples/toy/tgt new file mode 100644 index 00000000..174926b3 --- /dev/null +++ b/training/dtrain/examples/toy/tgt @@ -0,0 +1,2 @@ +i saw a little house +i found a little house diff --git a/training/dtrain/lplp.rb b/training/dtrain/lplp.rb index f0cd58c5..86e835e8 100755 --- a/training/dtrain/lplp.rb +++ b/training/dtrain/lplp.rb @@ -84,34 +84,28 @@ def _test() end #_test() -# actually do something + def usage() - puts "lplp.rb <l0,l1,l2,linfty,mean,median> <cut|select_k> <k|threshold> [n] < <input>" + puts "lplp.rb <l0,l1,l2,linfty,mean,median> <cut|select_k> <k|threshold> <#shards> < <input>" puts " l0...: norms for selection" puts "select_k: only output top k (according to the norm of their column vector) features" puts " cut: output features with weight >= threshold" puts " n: if we do not have a shard count use this number for averaging" - exit + exit 1 end -if ARGV.size < 3 then usage end +if ARGV.size < 4 then usage end norm_fun = method(ARGV[0].to_sym) type = ARGV[1] x = ARGV[2].to_f - -shard_count_key = "__SHARD_COUNT__" +shard_count = ARGV[3].to_f STDIN.set_encoding 'utf-8' STDOUT.set_encoding 'utf-8' w = {} -shard_count = 0 while line = STDIN.gets key, val = line.split /\s+/ - if key == shard_count_key - shard_count += 1 - next - end if w.has_key? key w[key].push val.to_f else @@ -119,8 +113,6 @@ while line = STDIN.gets end end -if ARGV.size == 4 then shard_count = ARGV[3].to_f end - if type == 'cut' cut(w, norm_fun, shard_count, x) elsif type == 'select_k' diff --git a/training/dtrain/parallelize.rb b/training/dtrain/parallelize.rb index 24e7f49e..e661416e 100755 --- a/training/dtrain/parallelize.rb +++ b/training/dtrain/parallelize.rb @@ -3,16 +3,15 @@ require 'trollop' def usage - if ARGV.size != 8 - STDERR.write "Usage: " - STDERR.write "ruby parallelize.rb -c <dtrain.ini> -e <epochs> [--randomize/-z] [--reshard/-y] -s <#shards|0> -p <at once> -i <input> -r <refs> [--qsub/-q] --dtrain_binary <path to dtrain binary> -l \"l2 select_k 100000\"\n" - exit 1 - end + STDERR.write "Usage: " + STDERR.write "ruby parallelize.rb -c <dtrain.ini> [-e <epochs=10>] [--randomize/-z] [--reshard/-y] -s <#shards|0> [-p <at once=9999>] -i <input> -r <refs> [--qsub/-q] [--dtrain_binary <path to dtrain binary>] [-l \"l2 select_k 100000\"]\n" + exit 1 end opts = Trollop::options do opt :config, "dtrain config file", :type => :string - opt :epochs, "number of epochs", :type => :int + opt :epochs, "number of epochs", :type => :int, :default => 10 + opt :lplp_args, "arguments for lplp.rb", :type => :string, :default => "l2 select_k 100000" opt :randomize, "randomize shards before each epoch", :type => :bool, :short => '-z', :default => false opt :reshard, "reshard after each epoch", :type => :bool, :short => '-y', :default => false opt :shards, "number of shards", :type => :int @@ -21,8 +20,8 @@ opts = Trollop::options do opt :references, "references", :type => :string opt :qsub, "use qsub", :type => :bool, :default => false opt :dtrain_binary, "path to dtrain binary", :type => :string - opt :lplp_args, "arguments for lplp arguments", :type => :string, :default => "l2 select_k 100000" end +usage if not opts[:config]&&opts[:shards]&&opts[:input]&&opts[:references] dtrain_dir = File.expand_path File.dirname(__FILE__) @@ -32,16 +31,14 @@ else dtrain_bin = opts[:dtrain_binary] end ruby = '/usr/bin/ruby' -lplp_rb = "#{dtrain_dir}/hstreaming/lplp.rb" +lplp_rb = "#{dtrain_dir}/lplp.rb" lplp_args = opts[:lplp_args] cat = '/bin/cat' ini = opts[:config] epochs = opts[:epochs] -rand = false -rand = true if opts[:randomize] -reshard = false -reshard = true if opts[:reshard] +rand = opts[:randomize] +reshard = opts[:reshard] predefined_shards = false if opts[:shards] == 0 predefined_shards = true @@ -49,11 +46,10 @@ if opts[:shards] == 0 else num_shards = opts[:shards] end -shards_at_once = opts[:processes_at_once] input = opts[:input] refs = opts[:references] -use_qsub = false -use_qsub = true if opts[:qsub] +use_qsub = opts[:qsub] +shards_at_once = opts[:processes_at_once] `mkdir work` diff --git a/training/dtrain/test/example/README b/training/dtrain/test/example/README deleted file mode 100644 index 2df77086..00000000 --- a/training/dtrain/test/example/README +++ /dev/null @@ -1,8 +0,0 @@ -Small example of input format for distributed training. -Call dtrain from this folder with ../../dtrain -c test/example/dtrain.ini . - -For this to work, undef 'DTRAIN_LOCAL' in dtrain.h -and recompile. - -data can be found here: http://simianer.de/#dtrain - diff --git a/training/dtrain/test/example/dtrain.ini b/training/dtrain/test/example/dtrain.ini deleted file mode 100644 index 97fce7f0..00000000 --- a/training/dtrain/test/example/dtrain.ini +++ /dev/null @@ -1,22 +0,0 @@ -input=./nc-wmt11.1k.gz # use '-' for STDIN -output=- # a weights file (add .gz for gzip compression) or STDOUT '-' -select_weights=VOID # don't output weights -decoder_config=./cdec.ini # config for cdec -# weights for these features will be printed on each iteration -print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough -tmp=/tmp -stop_after=10 # stop epoch after 10 inputs - -# interesting stuff -epochs=2 # run over input 2 times -k=100 # use 100best lists -N=4 # optimize (approx) BLEU4 -scorer=stupid_bleu # use 'stupid' BLEU+1 -learning_rate=1.0 # learning rate, don't care if gamma=0 (perceptron) -gamma=0 # use SVM reg -sample_from=kbest # use kbest lists (as opposed to forest) -filter=uniq # only unique entries in kbest (surface form) -pair_sampling=XYX -hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here -pair_threshold=0 # minimum distance in BLEU (this will still only use pairs with diff > 0) -loss_margin=0 diff --git a/training/dtrain/test/example/expected-output b/training/dtrain/test/example/expected-output deleted file mode 100644 index 05326763..00000000 --- a/training/dtrain/test/example/expected-output +++ /dev/null @@ -1,89 +0,0 @@ - cdec cfg 'test/example/cdec.ini' -Loading the LM will be faster if you build a binary file. -Reading test/example/nc-wmt11.en.srilm.gz -----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 -**************************************************************************************************** - Example feature: Shape_S00000_T00000 -Seeding random number sequence to 2912000813 - -dtrain -Parameters: - k 100 - N 4 - T 2 - scorer 'stupid_bleu' - sample from 'kbest' - filter 'uniq' - learning rate 1 - gamma 0 - loss margin 0 - pairs 'XYX' - hi lo 0.1 - pair threshold 0 - select weights 'VOID' - l1 reg 0 'none' - max pairs 4294967295 - cdec cfg 'test/example/cdec.ini' - input 'test/example/nc-wmt11.1k.gz' - output '-' - stop_after 10 -(a dot represents 10 inputs) -Iteration #1 of 2. - . 10 -Stopping after 10 input sentences. -WEIGHTS - Glue = -637 - WordPenalty = +1064 - LanguageModel = +1175.3 - LanguageModel_OOV = -1437 - PhraseModel_0 = +1935.6 - PhraseModel_1 = +2499.3 - PhraseModel_2 = +964.96 - PhraseModel_3 = +1410.8 - PhraseModel_4 = -5977.9 - PhraseModel_5 = +522 - PhraseModel_6 = +1089 - PassThrough = -1308 - --- - 1best avg score: 0.16963 (+0.16963) - 1best avg model score: 64485 (+64485) - avg # pairs: 1494.4 - avg # rank err: 702.6 - avg # margin viol: 0 - non0 feature count: 528 - avg list sz: 85.7 - avg f count: 102.75 -(time 0.083 min, 0.5 s/S) - -Iteration #2 of 2. - . 10 -WEIGHTS - Glue = -1196 - WordPenalty = +809.52 - LanguageModel = +3112.1 - LanguageModel_OOV = -1464 - PhraseModel_0 = +3895.5 - PhraseModel_1 = +4683.4 - PhraseModel_2 = +1092.8 - PhraseModel_3 = +1079.6 - PhraseModel_4 = -6827.7 - PhraseModel_5 = -888 - PhraseModel_6 = +142 - PassThrough = -1335 - --- - 1best avg score: 0.277 (+0.10736) - 1best avg model score: -3110.5 (-67595) - avg # pairs: 1144.2 - avg # rank err: 529.1 - avg # margin viol: 0 - non0 feature count: 859 - avg list sz: 74.9 - avg f count: 112.84 -(time 0.067 min, 0.4 s/S) - -Writing weights file to '-' ... -done - ---- -Best iteration: 2 [SCORE 'stupid_bleu'=0.277]. -This took 0.15 min. diff --git a/training/dtrain/test/parallelize/in b/training/dtrain/test/parallelize/in deleted file mode 100644 index 3b7dec39..00000000 --- a/training/dtrain/test/parallelize/in +++ /dev/null @@ -1,10 +0,0 @@ -<seg grammar="g/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg> -<seg grammar="g/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg> -<seg grammar="g/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg> -<seg grammar="g/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg> -<seg grammar="g/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg> -<seg grammar="g/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg> -<seg grammar="g/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg> -<seg grammar="g/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg> -<seg grammar="g/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg> -<seg grammar="g/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg> diff --git a/training/dtrain/test/toy/input b/training/dtrain/test/toy/input deleted file mode 100644 index 4d10a9ea..00000000 --- a/training/dtrain/test/toy/input +++ /dev/null @@ -1,2 +0,0 @@ -0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 -1 ich fand ein kleines haus i found a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 |