diff options
Diffstat (limited to 'training/dtrain/test')
22 files changed, 0 insertions, 222 deletions
diff --git a/training/dtrain/test/example/README b/training/dtrain/test/example/README deleted file mode 100644 index 2df77086..00000000 --- a/training/dtrain/test/example/README +++ /dev/null @@ -1,8 +0,0 @@ -Small example of input format for distributed training. -Call dtrain from this folder with ../../dtrain -c test/example/dtrain.ini . - -For this to work, undef 'DTRAIN_LOCAL' in dtrain.h -and recompile. - -data can be found here: http://simianer.de/#dtrain - diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/test/example/cdec.ini deleted file mode 100644 index 0215416d..00000000 --- a/training/dtrain/test/example/cdec.ini +++ /dev/null @@ -1,25 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -scfg_max_span_limit=15 -intersection_strategy=cube_pruning -cubepruning_pop_limit=200 -feature_function=WordPenalty -feature_function=KLanguageModel ./nc-wmt11.en.srilm.gz -# all currently working feature functions for translation: -# (with those features active that were used in the ACL paper) -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -feature_function=RuleIdentityFeatures -feature_function=RuleSourceBigramFeatures -feature_function=RuleTargetBigramFeatures -feature_function=RuleShape -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures diff --git a/training/dtrain/test/example/dtrain.ini b/training/dtrain/test/example/dtrain.ini deleted file mode 100644 index 97fce7f0..00000000 --- a/training/dtrain/test/example/dtrain.ini +++ /dev/null @@ -1,22 +0,0 @@ -input=./nc-wmt11.1k.gz # use '-' for STDIN -output=- # a weights file (add .gz for gzip compression) or STDOUT '-' -select_weights=VOID # don't output weights -decoder_config=./cdec.ini # config for cdec -# weights for these features will be printed on each iteration -print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough -tmp=/tmp -stop_after=10 # stop epoch after 10 inputs - -# interesting stuff -epochs=2 # run over input 2 times -k=100 # use 100best lists -N=4 # optimize (approx) BLEU4 -scorer=stupid_bleu # use 'stupid' BLEU+1 -learning_rate=1.0 # learning rate, don't care if gamma=0 (perceptron) -gamma=0 # use SVM reg -sample_from=kbest # use kbest lists (as opposed to forest) -filter=uniq # only unique entries in kbest (surface form) -pair_sampling=XYX -hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here -pair_threshold=0 # minimum distance in BLEU (this will still only use pairs with diff > 0) -loss_margin=0 diff --git a/training/dtrain/test/example/expected-output b/training/dtrain/test/example/expected-output deleted file mode 100644 index 05326763..00000000 --- a/training/dtrain/test/example/expected-output +++ /dev/null @@ -1,89 +0,0 @@ - cdec cfg 'test/example/cdec.ini' -Loading the LM will be faster if you build a binary file. -Reading test/example/nc-wmt11.en.srilm.gz -----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 -**************************************************************************************************** - Example feature: Shape_S00000_T00000 -Seeding random number sequence to 2912000813 - -dtrain -Parameters: - k 100 - N 4 - T 2 - scorer 'stupid_bleu' - sample from 'kbest' - filter 'uniq' - learning rate 1 - gamma 0 - loss margin 0 - pairs 'XYX' - hi lo 0.1 - pair threshold 0 - select weights 'VOID' - l1 reg 0 'none' - max pairs 4294967295 - cdec cfg 'test/example/cdec.ini' - input 'test/example/nc-wmt11.1k.gz' - output '-' - stop_after 10 -(a dot represents 10 inputs) -Iteration #1 of 2. - . 10 -Stopping after 10 input sentences. -WEIGHTS - Glue = -637 - WordPenalty = +1064 - LanguageModel = +1175.3 - LanguageModel_OOV = -1437 - PhraseModel_0 = +1935.6 - PhraseModel_1 = +2499.3 - PhraseModel_2 = +964.96 - PhraseModel_3 = +1410.8 - PhraseModel_4 = -5977.9 - PhraseModel_5 = +522 - PhraseModel_6 = +1089 - PassThrough = -1308 - --- - 1best avg score: 0.16963 (+0.16963) - 1best avg model score: 64485 (+64485) - avg # pairs: 1494.4 - avg # rank err: 702.6 - avg # margin viol: 0 - non0 feature count: 528 - avg list sz: 85.7 - avg f count: 102.75 -(time 0.083 min, 0.5 s/S) - -Iteration #2 of 2. - . 10 -WEIGHTS - Glue = -1196 - WordPenalty = +809.52 - LanguageModel = +3112.1 - LanguageModel_OOV = -1464 - PhraseModel_0 = +3895.5 - PhraseModel_1 = +4683.4 - PhraseModel_2 = +1092.8 - PhraseModel_3 = +1079.6 - PhraseModel_4 = -6827.7 - PhraseModel_5 = -888 - PhraseModel_6 = +142 - PassThrough = -1335 - --- - 1best avg score: 0.277 (+0.10736) - 1best avg model score: -3110.5 (-67595) - avg # pairs: 1144.2 - avg # rank err: 529.1 - avg # margin viol: 0 - non0 feature count: 859 - avg list sz: 74.9 - avg f count: 112.84 -(time 0.067 min, 0.4 s/S) - -Writing weights file to '-' ... -done - ---- -Best iteration: 2 [SCORE 'stupid_bleu'=0.277]. -This took 0.15 min. diff --git a/training/dtrain/test/parallelize/README b/training/dtrain/test/parallelize/README deleted file mode 100644 index 89715105..00000000 --- a/training/dtrain/test/parallelize/README +++ /dev/null @@ -1,5 +0,0 @@ -run for example - ../../parallelize.rb ./dtrain.ini 4 false 2 2 ./in ./refs - -final weights will be in the file work/weights.3 - diff --git a/training/dtrain/test/parallelize/cdec.ini b/training/dtrain/test/parallelize/cdec.ini deleted file mode 100644 index e43ba1c4..00000000 --- a/training/dtrain/test/parallelize/cdec.ini +++ /dev/null @@ -1,22 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -intersection_strategy=cube_pruning -cubepruning_pop_limit=200 -scfg_max_span_limit=15 -feature_function=WordPenalty -feature_function=KLanguageModel ../example/nc-wmt11.en.srilm.gz -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -#feature_function=RuleIdentityFeatures -#feature_function=RuleNgramFeatures -#feature_function=RuleShape -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures diff --git a/training/dtrain/test/parallelize/dtrain.ini b/training/dtrain/test/parallelize/dtrain.ini deleted file mode 100644 index 03f9d240..00000000 --- a/training/dtrain/test/parallelize/dtrain.ini +++ /dev/null @@ -1,15 +0,0 @@ -k=100 -N=4 -learning_rate=0.0001 -gamma=0 -loss_margin=0 -epochs=1 -scorer=stupid_bleu -sample_from=kbest -filter=uniq -pair_sampling=XYX -hi_lo=0.1 -select_weights=last -print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough -tmp=/tmp -decoder_config=cdec.ini diff --git a/training/dtrain/test/parallelize/g/grammar.out.0.gz b/training/dtrain/test/parallelize/g/grammar.out.0.gz Binary files differdeleted file mode 100644 index 1e28a24b..00000000 --- a/training/dtrain/test/parallelize/g/grammar.out.0.gz +++ /dev/null diff --git a/training/dtrain/test/parallelize/g/grammar.out.1.gz b/training/dtrain/test/parallelize/g/grammar.out.1.gz Binary files differdeleted file mode 100644 index 372f5675..00000000 --- a/training/dtrain/test/parallelize/g/grammar.out.1.gz +++ /dev/null diff --git a/training/dtrain/test/parallelize/g/grammar.out.2.gz b/training/dtrain/test/parallelize/g/grammar.out.2.gz Binary files differdeleted file mode 100644 index 145d0dc0..00000000 --- a/training/dtrain/test/parallelize/g/grammar.out.2.gz +++ /dev/null diff --git a/training/dtrain/test/parallelize/g/grammar.out.3.gz b/training/dtrain/test/parallelize/g/grammar.out.3.gz Binary files differdeleted file mode 100644 index 105593ff..00000000 --- a/training/dtrain/test/parallelize/g/grammar.out.3.gz +++ /dev/null diff --git a/training/dtrain/test/parallelize/g/grammar.out.4.gz b/training/dtrain/test/parallelize/g/grammar.out.4.gz Binary files differdeleted file mode 100644 index 30781f48..00000000 --- a/training/dtrain/test/parallelize/g/grammar.out.4.gz +++ /dev/null diff --git a/training/dtrain/test/parallelize/g/grammar.out.5.gz b/training/dtrain/test/parallelize/g/grammar.out.5.gz Binary files differdeleted file mode 100644 index 834ee759..00000000 --- a/training/dtrain/test/parallelize/g/grammar.out.5.gz +++ /dev/null diff --git a/training/dtrain/test/parallelize/g/grammar.out.6.gz b/training/dtrain/test/parallelize/g/grammar.out.6.gz Binary files differdeleted file mode 100644 index 2e76f348..00000000 --- a/training/dtrain/test/parallelize/g/grammar.out.6.gz +++ /dev/null diff --git a/training/dtrain/test/parallelize/g/grammar.out.7.gz b/training/dtrain/test/parallelize/g/grammar.out.7.gz Binary files differdeleted file mode 100644 index 3741a887..00000000 --- a/training/dtrain/test/parallelize/g/grammar.out.7.gz +++ /dev/null diff --git a/training/dtrain/test/parallelize/g/grammar.out.8.gz b/training/dtrain/test/parallelize/g/grammar.out.8.gz Binary files differdeleted file mode 100644 index ebf6bd0c..00000000 --- a/training/dtrain/test/parallelize/g/grammar.out.8.gz +++ /dev/null diff --git a/training/dtrain/test/parallelize/g/grammar.out.9.gz b/training/dtrain/test/parallelize/g/grammar.out.9.gz Binary files differdeleted file mode 100644 index c1791059..00000000 --- a/training/dtrain/test/parallelize/g/grammar.out.9.gz +++ /dev/null diff --git a/training/dtrain/test/parallelize/in b/training/dtrain/test/parallelize/in deleted file mode 100644 index 3b7dec39..00000000 --- a/training/dtrain/test/parallelize/in +++ /dev/null @@ -1,10 +0,0 @@ -<seg grammar="g/grammar.out.0.gz" id="0">europas nach rassen geteiltes haus</seg> -<seg grammar="g/grammar.out.1.gz" id="1">ein gemeinsames merkmal aller extremen rechten in europa ist ihr rassismus und die tatsache , daß sie das einwanderungsproblem als politischen hebel benutzen .</seg> -<seg grammar="g/grammar.out.2.gz" id="2">der lega nord in italien , der vlaams block in den niederlanden , die anhänger von le pens nationaler front in frankreich , sind beispiele für parteien oder bewegungen , die sich um das gemeinsame thema : ablehnung der zuwanderung gebildet haben und um forderung nach einer vereinfachten politik , um sie zu regeln .</seg> -<seg grammar="g/grammar.out.3.gz" id="3">während individuen wie jörg haidar und jean @-@ marie le pen kommen und ( leider nicht zu bald ) wieder gehen mögen , wird die rassenfrage aus der europäischer politik nicht so bald verschwinden .</seg> -<seg grammar="g/grammar.out.4.gz" id="4">eine alternde einheimische bevölkerung und immer offenere grenzen vermehren die rassistische zersplitterung in den europäischen ländern .</seg> -<seg grammar="g/grammar.out.5.gz" id="5">die großen parteien der rechten und der linken mitte haben sich dem problem gestellt , in dem sie den kopf in den sand gesteckt und allen aussichten zuwider gehofft haben , es möge bald verschwinden .</seg> -<seg grammar="g/grammar.out.6.gz" id="6">das aber wird es nicht , wie die geschichte des rassismus in amerika deutlich zeigt .</seg> -<seg grammar="g/grammar.out.7.gz" id="7">die beziehungen zwischen den rassen standen in den usa über jahrzehnte - und tun das noch heute - im zentrum der politischen debatte . das ging so weit , daß rassentrennung genauso wichtig wie das einkommen wurde , - wenn nicht sogar noch wichtiger - um politische zuneigungen und einstellungen zu bestimmen .</seg> -<seg grammar="g/grammar.out.8.gz" id="8">der erste schritt , um mit der rassenfrage umzugehen ist , ursache und folgen rassistischer feindseligkeiten zu verstehen , auch dann , wenn das bedeutet , unangenehme tatsachen aufzudecken .</seg> -<seg grammar="g/grammar.out.9.gz" id="9">genau das haben in den usa eine große anzahl an forschungsvorhaben in wirtschaft , soziologie , psychologie und politikwissenschaft geleistet . diese forschungen zeigten , daß menschen unterschiedlicher rasse einander deutlich weniger vertrauen .</seg> diff --git a/training/dtrain/test/parallelize/refs b/training/dtrain/test/parallelize/refs deleted file mode 100644 index 632e27b0..00000000 --- a/training/dtrain/test/parallelize/refs +++ /dev/null @@ -1,10 +0,0 @@ -europe 's divided racial house -a common feature of europe 's extreme right is its racism and use of the immigration issue as a political wedge . -the lega nord in italy , the vlaams blok in the netherlands , the supporters of le pen 's national front in france , are all examples of parties or movements formed on the common theme of aversion to immigrants and promotion of simplistic policies to control them . -while individuals like jorg haidar and jean @-@ marie le pen may come and ( never to soon ) go , the race question will not disappear from european politics anytime soon . -an aging population at home and ever more open borders imply increasing racial fragmentation in european countries . -mainstream parties of the center left and center right have confronted this prospect by hiding their heads in the ground , hoping against hope that the problem will disappear . -it will not , as america 's racial history clearly shows . -race relations in the us have been for decades - and remain - at the center of political debate , to the point that racial cleavages are as important as income , if not more , as determinants of political preferences and attitudes . -the first step to address racial politics is to understand the origin and consequences of racial animosity , even if it means uncovering unpleasant truths . -this is precisely what a large amount of research in economics , sociology , psychology and political science has done for the us . diff --git a/training/dtrain/test/toy/cdec.ini b/training/dtrain/test/toy/cdec.ini deleted file mode 100644 index 98b02d44..00000000 --- a/training/dtrain/test/toy/cdec.ini +++ /dev/null @@ -1,2 +0,0 @@ -formalism=scfg -add_pass_through_rules=true diff --git a/training/dtrain/test/toy/dtrain.ini b/training/dtrain/test/toy/dtrain.ini deleted file mode 100644 index a091732f..00000000 --- a/training/dtrain/test/toy/dtrain.ini +++ /dev/null @@ -1,12 +0,0 @@ -decoder_config=test/toy/cdec.ini -input=test/toy/input -output=- -print_weights=logp shell_rule house_rule small_rule little_rule PassThrough -k=4 -N=4 -epochs=2 -scorer=bleu -sample_from=kbest -filter=uniq -pair_sampling=all -learning_rate=1 diff --git a/training/dtrain/test/toy/input b/training/dtrain/test/toy/input deleted file mode 100644 index 4d10a9ea..00000000 --- a/training/dtrain/test/toy/input +++ /dev/null @@ -1,2 +0,0 @@ -0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 -1 ich fand ein kleines haus i found a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 |