From 2a48d73eb794fdd736d1df035c8a31af887cde0a Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 15 Mar 2013 11:31:18 +0100 Subject: overhauled ruby scripts and examples --- training/dtrain/examples/parallelized/README | 5 ++ training/dtrain/examples/parallelized/cdec.ini | 22 ++++++++ training/dtrain/examples/parallelized/dtrain.ini | 16 ++++++ .../examples/parallelized/grammar/grammar.out.0.gz | Bin 0 -> 8318 bytes .../examples/parallelized/grammar/grammar.out.1.gz | Bin 0 -> 358560 bytes .../examples/parallelized/grammar/grammar.out.2.gz | Bin 0 -> 1014466 bytes .../examples/parallelized/grammar/grammar.out.3.gz | Bin 0 -> 391811 bytes .../examples/parallelized/grammar/grammar.out.4.gz | Bin 0 -> 149590 bytes .../examples/parallelized/grammar/grammar.out.5.gz | Bin 0 -> 537024 bytes .../examples/parallelized/grammar/grammar.out.6.gz | Bin 0 -> 291286 bytes .../examples/parallelized/grammar/grammar.out.7.gz | Bin 0 -> 1038140 bytes .../examples/parallelized/grammar/grammar.out.8.gz | Bin 0 -> 419889 bytes .../examples/parallelized/grammar/grammar.out.9.gz | Bin 0 -> 409140 bytes training/dtrain/examples/parallelized/in | 10 ++++ training/dtrain/examples/parallelized/refs | 10 ++++ training/dtrain/examples/parallelized/work/out.0.0 | 61 ++++++++++++++++++++ training/dtrain/examples/parallelized/work/out.0.1 | 62 +++++++++++++++++++++ training/dtrain/examples/parallelized/work/out.1.0 | 61 ++++++++++++++++++++ training/dtrain/examples/parallelized/work/out.1.1 | 62 +++++++++++++++++++++ .../dtrain/examples/parallelized/work/shard.0.0.in | 5 ++ .../examples/parallelized/work/shard.0.0.refs | 5 ++ .../dtrain/examples/parallelized/work/shard.1.0.in | 5 ++ .../examples/parallelized/work/shard.1.0.refs | 5 ++ .../dtrain/examples/parallelized/work/weights.0 | 12 ++++ .../dtrain/examples/parallelized/work/weights.0.0 | 12 ++++ .../dtrain/examples/parallelized/work/weights.0.1 | 12 ++++ .../dtrain/examples/parallelized/work/weights.1 | 12 ++++ .../dtrain/examples/parallelized/work/weights.1.0 | 11 ++++ .../dtrain/examples/parallelized/work/weights.1.1 | 12 ++++ 29 files changed, 400 insertions(+) create mode 100644 training/dtrain/examples/parallelized/README create mode 100644 training/dtrain/examples/parallelized/cdec.ini create mode 100644 training/dtrain/examples/parallelized/dtrain.ini create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.0.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.1.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.2.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.3.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.4.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.5.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.6.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.7.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.8.gz create mode 100644 training/dtrain/examples/parallelized/grammar/grammar.out.9.gz create mode 100644 training/dtrain/examples/parallelized/in create mode 100644 training/dtrain/examples/parallelized/refs create mode 100644 training/dtrain/examples/parallelized/work/out.0.0 create mode 100644 training/dtrain/examples/parallelized/work/out.0.1 create mode 100644 training/dtrain/examples/parallelized/work/out.1.0 create mode 100644 training/dtrain/examples/parallelized/work/out.1.1 create mode 100644 training/dtrain/examples/parallelized/work/shard.0.0.in create mode 100644 training/dtrain/examples/parallelized/work/shard.0.0.refs create mode 100644 training/dtrain/examples/parallelized/work/shard.1.0.in create mode 100644 training/dtrain/examples/parallelized/work/shard.1.0.refs create mode 100644 training/dtrain/examples/parallelized/work/weights.0 create mode 100644 training/dtrain/examples/parallelized/work/weights.0.0 create mode 100644 training/dtrain/examples/parallelized/work/weights.0.1 create mode 100644 training/dtrain/examples/parallelized/work/weights.1 create mode 100644 training/dtrain/examples/parallelized/work/weights.1.0 create mode 100644 training/dtrain/examples/parallelized/work/weights.1.1 (limited to 'training/dtrain/examples/parallelized') diff --git a/training/dtrain/examples/parallelized/README b/training/dtrain/examples/parallelized/README new file mode 100644 index 00000000..89715105 --- /dev/null +++ b/training/dtrain/examples/parallelized/README @@ -0,0 +1,5 @@ +run for example + ../../parallelize.rb ./dtrain.ini 4 false 2 2 ./in ./refs + +final weights will be in the file work/weights.3 + diff --git a/training/dtrain/examples/parallelized/cdec.ini b/training/dtrain/examples/parallelized/cdec.ini new file mode 100644 index 00000000..e43ba1c4 --- /dev/null +++ b/training/dtrain/examples/parallelized/cdec.ini @@ -0,0 +1,22 @@ +formalism=scfg +add_pass_through_rules=true +intersection_strategy=cube_pruning +cubepruning_pop_limit=200 +scfg_max_span_limit=15 +feature_function=WordPenalty +feature_function=KLanguageModel ../example/nc-wmt11.en.srilm.gz +#feature_function=ArityPenalty +#feature_function=CMR2008ReorderingFeatures +#feature_function=Dwarf +#feature_function=InputIndicator +#feature_function=LexNullJump +#feature_function=NewJump +#feature_function=NgramFeatures +#feature_function=NonLatinCount +#feature_function=OutputIndicator +#feature_function=RuleIdentityFeatures +#feature_function=RuleNgramFeatures +#feature_function=RuleShape +#feature_function=SourceSpanSizeFeatures +#feature_function=SourceWordPenalty +#feature_function=SpanFeatures diff --git a/training/dtrain/examples/parallelized/dtrain.ini b/training/dtrain/examples/parallelized/dtrain.ini new file mode 100644 index 00000000..f19ef891 --- /dev/null +++ b/training/dtrain/examples/parallelized/dtrain.ini @@ -0,0 +1,16 @@ +k=100 +N=4 +learning_rate=0.0001 +gamma=0 +loss_margin=1.0 +epochs=1 +scorer=stupid_bleu +sample_from=kbest +filter=uniq +pair_sampling=XYX +hi_lo=0.1 +select_weights=last +print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough +# newer version of the grammar extractor use different feature names: +#print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough +decoder_config=cdec.ini diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz new file mode 100644 index 00000000..1e28a24b Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.0.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz new file mode 100644 index 00000000..372f5675 Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.1.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz new file mode 100644 index 00000000..145d0dc0 Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.2.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz new file mode 100644 index 00000000..105593ff Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.3.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz new file mode 100644 index 00000000..30781f48 Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.4.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz new file mode 100644 index 00000000..834ee759 Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.5.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz new file mode 100644 index 00000000..2e76f348 Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.6.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz new file mode 100644 index 00000000..3741a887 Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.7.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz new file mode 100644 index 00000000..ebf6bd0c Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.8.gz differ diff --git a/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz b/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz new file mode 100644 index 00000000..c1791059 Binary files /dev/null and b/training/dtrain/examples/parallelized/grammar/grammar.out.9.gz differ diff --git a/training/dtrain/examples/parallelized/in b/training/dtrain/examples/parallelized/in new file mode 100644 index 00000000..51d01fe7 --- /dev/null +++ b/training/dtrain/examples/parallelized/in @@ -0,0 +1,10 @@ +europas nach rassen geteiltes haus +ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen . +der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln . +während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden . +eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern . +die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden . +das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt . +die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen . +der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken . +genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen . diff --git a/training/dtrain/examples/parallelized/refs b/training/dtrain/examples/parallelized/refs new file mode 100644 index 00000000..632e27b0 --- /dev/null +++ b/training/dtrain/examples/parallelized/refs @@ -0,0 +1,10 @@ +europe 's divided racial house +a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge . +the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them . +while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon . +an aging population at home and ever more open borders imply increasing racial fragmentation in european countries . +mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear . +it will not , as america 's racial history clearly shows . +race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes . +the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths . +this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us . diff --git a/training/dtrain/examples/parallelized/work/out.0.0 b/training/dtrain/examples/parallelized/work/out.0.0 new file mode 100644 index 00000000..7a00ed0f --- /dev/null +++ b/training/dtrain/examples/parallelized/work/out.0.0 @@ -0,0 +1,61 @@ + cdec cfg 'cdec.ini' +Loading the LM will be faster if you build a binary file. +Reading ../example/nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** +Seeding random number sequence to 3121929377 + +dtrain +Parameters: + k 100 + N 4 + T 1 + scorer 'stupid_bleu' + sample from 'kbest' + filter 'uniq' + learning rate 0.0001 + gamma 0 + loss margin 1 + pairs 'XYX' + hi lo 0.1 + pair threshold 0 + select weights 'last' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg 'cdec.ini' + input 'work/shard.0.0.in' + refs 'work/shard.0.0.refs' + output 'work/weights.0.0' +(a dot represents 10 inputs) +Iteration #1 of 1. + 5 +WEIGHTS + Glue = +0.2663 + WordPenalty = -0.0079042 + LanguageModel = +0.44782 + LanguageModel_OOV = -0.0401 + PhraseModel_0 = -0.193 + PhraseModel_1 = +0.71321 + PhraseModel_2 = +0.85196 + PhraseModel_3 = -0.43986 + PhraseModel_4 = -0.44803 + PhraseModel_5 = -0.0538 + PhraseModel_6 = -0.1788 + PassThrough = -0.1477 + --- + 1best avg score: 0.17521 (+0.17521) + 1best avg model score: 21.556 (+21.556) + avg # pairs: 1671.2 + avg # rank err: 1118.6 + avg # margin viol: 552.6 + non0 feature count: 12 + avg list sz: 100 + avg f count: 11.32 +(time 0.37 min, 4.4 s/S) + +Writing weights file to 'work/weights.0.0' ... +done + +--- +Best iteration: 1 [SCORE 'stupid_bleu'=0.17521]. +This took 0.36667 min. diff --git a/training/dtrain/examples/parallelized/work/out.0.1 b/training/dtrain/examples/parallelized/work/out.0.1 new file mode 100644 index 00000000..e2bd6649 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/out.0.1 @@ -0,0 +1,62 @@ + cdec cfg 'cdec.ini' +Loading the LM will be faster if you build a binary file. +Reading ../example/nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** +Seeding random number sequence to 2767202922 + +dtrain +Parameters: + k 100 + N 4 + T 1 + scorer 'stupid_bleu' + sample from 'kbest' + filter 'uniq' + learning rate 0.0001 + gamma 0 + loss margin 1 + pairs 'XYX' + hi lo 0.1 + pair threshold 0 + select weights 'last' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg 'cdec.ini' + input 'work/shard.0.0.in' + refs 'work/shard.0.0.refs' + output 'work/weights.0.1' + weights in 'work/weights.0' +(a dot represents 10 inputs) +Iteration #1 of 1. + 5 +WEIGHTS + Glue = -0.2699 + WordPenalty = +0.080605 + LanguageModel = -0.026572 + LanguageModel_OOV = -0.30025 + PhraseModel_0 = -0.32076 + PhraseModel_1 = +0.67451 + PhraseModel_2 = +0.92 + PhraseModel_3 = -0.36402 + PhraseModel_4 = -0.592 + PhraseModel_5 = -0.0269 + PhraseModel_6 = -0.28755 + PassThrough = -0.33285 + --- + 1best avg score: 0.26638 (+0.26638) + 1best avg model score: 53.197 (+53.197) + avg # pairs: 2028.6 + avg # rank err: 998.2 + avg # margin viol: 918.8 + non0 feature count: 12 + avg list sz: 100 + avg f count: 10.496 +(time 0.32 min, 3.8 s/S) + +Writing weights file to 'work/weights.0.1' ... +done + +--- +Best iteration: 1 [SCORE 'stupid_bleu'=0.26638]. +This took 0.31667 min. diff --git a/training/dtrain/examples/parallelized/work/out.1.0 b/training/dtrain/examples/parallelized/work/out.1.0 new file mode 100644 index 00000000..6e790e38 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/out.1.0 @@ -0,0 +1,61 @@ + cdec cfg 'cdec.ini' +Loading the LM will be faster if you build a binary file. +Reading ../example/nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** +Seeding random number sequence to 1432415010 + +dtrain +Parameters: + k 100 + N 4 + T 1 + scorer 'stupid_bleu' + sample from 'kbest' + filter 'uniq' + learning rate 0.0001 + gamma 0 + loss margin 1 + pairs 'XYX' + hi lo 0.1 + pair threshold 0 + select weights 'last' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg 'cdec.ini' + input 'work/shard.1.0.in' + refs 'work/shard.1.0.refs' + output 'work/weights.1.0' +(a dot represents 10 inputs) +Iteration #1 of 1. + 5 +WEIGHTS + Glue = -0.3815 + WordPenalty = +0.20064 + LanguageModel = +0.95304 + LanguageModel_OOV = -0.264 + PhraseModel_0 = -0.22362 + PhraseModel_1 = +0.12254 + PhraseModel_2 = +0.26328 + PhraseModel_3 = +0.38018 + PhraseModel_4 = -0.48654 + PhraseModel_5 = +0 + PhraseModel_6 = -0.3645 + PassThrough = -0.2216 + --- + 1best avg score: 0.10863 (+0.10863) + 1best avg model score: -4.9841 (-4.9841) + avg # pairs: 1345.4 + avg # rank err: 822.4 + avg # margin viol: 501 + non0 feature count: 11 + avg list sz: 100 + avg f count: 11.814 +(time 0.45 min, 5.4 s/S) + +Writing weights file to 'work/weights.1.0' ... +done + +--- +Best iteration: 1 [SCORE 'stupid_bleu'=0.10863]. +This took 0.45 min. diff --git a/training/dtrain/examples/parallelized/work/out.1.1 b/training/dtrain/examples/parallelized/work/out.1.1 new file mode 100644 index 00000000..0b984761 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/out.1.1 @@ -0,0 +1,62 @@ + cdec cfg 'cdec.ini' +Loading the LM will be faster if you build a binary file. +Reading ../example/nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** +Seeding random number sequence to 1771918374 + +dtrain +Parameters: + k 100 + N 4 + T 1 + scorer 'stupid_bleu' + sample from 'kbest' + filter 'uniq' + learning rate 0.0001 + gamma 0 + loss margin 1 + pairs 'XYX' + hi lo 0.1 + pair threshold 0 + select weights 'last' + l1 reg 0 'none' + max pairs 4294967295 + cdec cfg 'cdec.ini' + input 'work/shard.1.0.in' + refs 'work/shard.1.0.refs' + output 'work/weights.1.1' + weights in 'work/weights.0' +(a dot represents 10 inputs) +Iteration #1 of 1. + 5 +WEIGHTS + Glue = -0.3178 + WordPenalty = +0.11092 + LanguageModel = +0.17269 + LanguageModel_OOV = -0.13485 + PhraseModel_0 = -0.45371 + PhraseModel_1 = +0.38789 + PhraseModel_2 = +0.75311 + PhraseModel_3 = -0.38163 + PhraseModel_4 = -0.58817 + PhraseModel_5 = -0.0269 + PhraseModel_6 = -0.27315 + PassThrough = -0.16745 + --- + 1best avg score: 0.13169 (+0.13169) + 1best avg model score: 24.226 (+24.226) + avg # pairs: 1951.2 + avg # rank err: 985.4 + avg # margin viol: 951 + non0 feature count: 12 + avg list sz: 100 + avg f count: 11.224 +(time 0.42 min, 5 s/S) + +Writing weights file to 'work/weights.1.1' ... +done + +--- +Best iteration: 1 [SCORE 'stupid_bleu'=0.13169]. +This took 0.41667 min. diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.in b/training/dtrain/examples/parallelized/work/shard.0.0.in new file mode 100644 index 00000000..92f9c78e --- /dev/null +++ b/training/dtrain/examples/parallelized/work/shard.0.0.in @@ -0,0 +1,5 @@ +europas nach rassen geteiltes haus +ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen . +der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln . +während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden . +eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern . diff --git a/training/dtrain/examples/parallelized/work/shard.0.0.refs b/training/dtrain/examples/parallelized/work/shard.0.0.refs new file mode 100644 index 00000000..bef68fee --- /dev/null +++ b/training/dtrain/examples/parallelized/work/shard.0.0.refs @@ -0,0 +1,5 @@ +europe 's divided racial house +a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge . +the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them . +while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon . +an aging population at home and ever more open borders imply increasing racial fragmentation in european countries . diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.in b/training/dtrain/examples/parallelized/work/shard.1.0.in new file mode 100644 index 00000000..b7695ce7 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/shard.1.0.in @@ -0,0 +1,5 @@ +die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden . +das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt . +die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen . +der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken . +genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen . diff --git a/training/dtrain/examples/parallelized/work/shard.1.0.refs b/training/dtrain/examples/parallelized/work/shard.1.0.refs new file mode 100644 index 00000000..6076f6d5 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/shard.1.0.refs @@ -0,0 +1,5 @@ +mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear . +it will not , as america 's racial history clearly shows . +race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes . +the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths . +this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us . diff --git a/training/dtrain/examples/parallelized/work/weights.0 b/training/dtrain/examples/parallelized/work/weights.0 new file mode 100644 index 00000000..ddd595a8 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.0 @@ -0,0 +1,12 @@ +LanguageModel 0.7004298992212881 +PhraseModel_2 0.5576194336478857 +PhraseModel_1 0.41787318415343155 +PhraseModel_4 -0.46728502545635164 +PhraseModel_3 -0.029839521598455515 +Glue -0.05760000000000068 +PhraseModel_6 -0.2716499999999978 +PhraseModel_0 -0.20831031065605327 +LanguageModel_OOV -0.15205000000000077 +PassThrough -0.1846500000000006 +WordPenalty 0.09636994553433414 +PhraseModel_5 -0.026900000000000257 diff --git a/training/dtrain/examples/parallelized/work/weights.0.0 b/training/dtrain/examples/parallelized/work/weights.0.0 new file mode 100644 index 00000000..c9370b18 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.0.0 @@ -0,0 +1,12 @@ +WordPenalty -0.0079041595706392243 +LanguageModel 0.44781580828279532 +LanguageModel_OOV -0.04010000000000042 +Glue 0.26629999999999948 +PhraseModel_0 -0.19299677809125185 +PhraseModel_1 0.71321026861732773 +PhraseModel_2 0.85195540993310537 +PhraseModel_3 -0.43986310822842656 +PhraseModel_4 -0.44802855630415955 +PhraseModel_5 -0.053800000000000514 +PhraseModel_6 -0.17879999999999835 +PassThrough -0.14770000000000036 diff --git a/training/dtrain/examples/parallelized/work/weights.0.1 b/training/dtrain/examples/parallelized/work/weights.0.1 new file mode 100644 index 00000000..8fad3de8 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.0.1 @@ -0,0 +1,12 @@ +WordPenalty 0.080605055841244472 +LanguageModel -0.026571720531022844 +LanguageModel_OOV -0.30024999999999141 +Glue -0.26989999999999842 +PhraseModel_2 0.92000295209089566 +PhraseModel_1 0.67450748692470841 +PhraseModel_4 -0.5920000014976784 +PhraseModel_3 -0.36402437203127397 +PhraseModel_6 -0.28754999999999603 +PhraseModel_0 -0.32076244202907672 +PassThrough -0.33284999999999004 +PhraseModel_5 -0.026900000000000257 diff --git a/training/dtrain/examples/parallelized/work/weights.1 b/training/dtrain/examples/parallelized/work/weights.1 new file mode 100644 index 00000000..03058a16 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.1 @@ -0,0 +1,12 @@ +PhraseModel_2 0.8365578543552836 +PhraseModel_4 -0.5900840266009169 +PhraseModel_1 0.5312000609786991 +PhraseModel_0 -0.3872342271319619 +PhraseModel_3 -0.3728279676912084 +Glue -0.2938500000000036 +PhraseModel_6 -0.2803499999999967 +PassThrough -0.25014999999999626 +LanguageModel_OOV -0.21754999999999702 +LanguageModel 0.07306061161169894 +WordPenalty 0.09576193325966899 +PhraseModel_5 -0.026900000000000257 diff --git a/training/dtrain/examples/parallelized/work/weights.1.0 b/training/dtrain/examples/parallelized/work/weights.1.0 new file mode 100644 index 00000000..6a6a65c1 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.1.0 @@ -0,0 +1,11 @@ +WordPenalty 0.20064405063930751 +LanguageModel 0.9530439901597807 +LanguageModel_OOV -0.26400000000000112 +Glue -0.38150000000000084 +PhraseModel_0 -0.22362384322085468 +PhraseModel_1 0.12253609968953538 +PhraseModel_2 0.26328345736266612 +PhraseModel_3 0.38018406503151553 +PhraseModel_4 -0.48654149460854373 +PhraseModel_6 -0.36449999999999722 +PassThrough -0.22160000000000085 diff --git a/training/dtrain/examples/parallelized/work/weights.1.1 b/training/dtrain/examples/parallelized/work/weights.1.1 new file mode 100644 index 00000000..f56ea4a2 --- /dev/null +++ b/training/dtrain/examples/parallelized/work/weights.1.1 @@ -0,0 +1,12 @@ +WordPenalty 0.1109188106780935 +LanguageModel 0.17269294375442074 +LanguageModel_OOV -0.13485000000000266 +Glue -0.3178000000000088 +PhraseModel_2 0.75311275661967159 +PhraseModel_1 0.38789263503268989 +PhraseModel_4 -0.58816805170415531 +PhraseModel_3 -0.38163156335114284 +PhraseModel_6 -0.27314999999999739 +PhraseModel_0 -0.45370601223484697 +PassThrough -0.16745000000000249 +PhraseModel_5 -0.026900000000000257 -- cgit v1.2.3