diff options
author | Avneesh Saluja <asaluja@gmail.com> | 2013-03-28 18:28:16 -0700 |
---|---|---|
committer | Avneesh Saluja <asaluja@gmail.com> | 2013-03-28 18:28:16 -0700 |
commit | 5b8253e0e1f1393a509fb9975ba8c1347af758ed (patch) | |
tree | 1790470b1d07a0b4973ebce19192e896566ea60b /dtrain/test | |
parent | 2389a5a8a43dda87c355579838559515b0428421 (diff) | |
parent | b203f8c5dc8cff1b9c9c2073832b248fcad0765a (diff) |
fixed conflicts
Diffstat (limited to 'dtrain/test')
-rw-r--r-- | dtrain/test/example/README | 8 | ||||
-rw-r--r-- | dtrain/test/example/cdec.ini | 24 | ||||
-rw-r--r-- | dtrain/test/example/dtrain.ini | 22 | ||||
-rw-r--r-- | dtrain/test/example/expected-output | 125 | ||||
-rw-r--r-- | dtrain/test/toy/cdec.ini | 2 | ||||
-rw-r--r-- | dtrain/test/toy/dtrain.ini | 12 | ||||
-rw-r--r-- | dtrain/test/toy/input | 2 |
7 files changed, 0 insertions, 195 deletions
diff --git a/dtrain/test/example/README b/dtrain/test/example/README deleted file mode 100644 index 6937b11b..00000000 --- a/dtrain/test/example/README +++ /dev/null @@ -1,8 +0,0 @@ -Small example of input format for distributed training. -Call dtrain from cdec/dtrain/ with ./dtrain -c test/example/dtrain.ini . - -For this to work, undef 'DTRAIN_LOCAL' in dtrain.h -and recompile. - -Data is here: http://simianer.de/#dtrain - diff --git a/dtrain/test/example/cdec.ini b/dtrain/test/example/cdec.ini deleted file mode 100644 index 6642107f..00000000 --- a/dtrain/test/example/cdec.ini +++ /dev/null @@ -1,24 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -scfg_max_span_limit=15 -intersection_strategy=cube_pruning -cubepruning_pop_limit=30 -feature_function=WordPenalty -feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz -# all currently working feature functions for translation: -# (with those features active that were used in the ACL paper) -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -feature_function=RuleIdentityFeatures -feature_function=RuleNgramFeatures -feature_function=RuleShape -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini deleted file mode 100644 index c8ac7c3f..00000000 --- a/dtrain/test/example/dtrain.ini +++ /dev/null @@ -1,22 +0,0 @@ -input=test/example/nc-wmt11.1k.gz # use '-' for STDIN -output=- # a weights file (add .gz for gzip compression) or STDOUT '-' -select_weights=VOID # don't output weights -decoder_config=test/example/cdec.ini # config for cdec -# weights for these features will be printed on each iteration -print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough -tmp=/tmp -stop_after=10 # stop epoch after 10 inputs - -# interesting stuff -epochs=3 # run over input 3 times -k=100 # use 100best lists -N=4 # optimize (approx) BLEU4 -scorer=stupid_bleu # use 'stupid' BLEU+1 -learning_rate=0.0001 # learning rate -gamma=0 # use SVM reg -sample_from=kbest # use kbest lists (as opposed to forest) -filter=uniq # only unique entries in kbest (surface form) -pair_sampling=XYX -hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here -pair_threshold=0 # minimum distance in BLEU (this will still only use pairs with diff > 0) -loss_margin=0 diff --git a/dtrain/test/example/expected-output b/dtrain/test/example/expected-output deleted file mode 100644 index 25d2c069..00000000 --- a/dtrain/test/example/expected-output +++ /dev/null @@ -1,125 +0,0 @@ - cdec cfg 'test/example/cdec.ini' -feature: WordPenalty (no config parameters) -State is 0 bytes for feature WordPenalty -feature: KLanguageModel (with config parameters 'test/example/nc-wmt11.en.srilm.gz') -Loading the LM will be faster if you build a binary file. -Reading test/example/nc-wmt11.en.srilm.gz -----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 -**************************************************************************************************** -Loaded 5-gram KLM from test/example/nc-wmt11.en.srilm.gz (MapSize=49581) -State is 98 bytes for feature KLanguageModel test/example/nc-wmt11.en.srilm.gz -feature: RuleIdentityFeatures (no config parameters) -State is 0 bytes for feature RuleIdentityFeatures -feature: RuleNgramFeatures (no config parameters) -State is 0 bytes for feature RuleNgramFeatures -feature: RuleShape (no config parameters) - Example feature: Shape_S00000_T00000 -State is 0 bytes for feature RuleShape -Seeding random number sequence to 1072059181 - -dtrain -Parameters: - k 100 - N 4 - T 3 - scorer 'stupid_bleu' - sample from 'kbest' - filter 'uniq' - learning rate 0.0001 - gamma 0 - loss margin 0 - pairs 'XYX' - hi lo 0.1 - pair threshold 0 - select weights 'VOID' - l1 reg 0 'none' - cdec cfg 'test/example/cdec.ini' - input 'test/example/nc-wmt11.1k.gz' - output '-' - stop_after 10 -(a dot represents 10 inputs) -Iteration #1 of 3. - . 10 -Stopping after 10 input sentences. -WEIGHTS - Glue = -0.0293 - WordPenalty = +0.049075 - LanguageModel = +0.24345 - LanguageModel_OOV = -0.2029 - PhraseModel_0 = +0.0084102 - PhraseModel_1 = +0.021729 - PhraseModel_2 = +0.014922 - PhraseModel_3 = +0.104 - PhraseModel_4 = -0.14308 - PhraseModel_5 = +0.0247 - PhraseModel_6 = -0.012 - PassThrough = -0.2161 - --- - 1best avg score: 0.16872 (+0.16872) - 1best avg model score: -1.8276 (-1.8276) - avg # pairs: 1121.1 - avg # rank err: 555.6 - avg # margin viol: 0 - non0 feature count: 277 - avg list sz: 77.2 - avg f count: 90.96 -(time 0.1 min, 0.6 s/S) - -Iteration #2 of 3. - . 10 -WEIGHTS - Glue = -0.3526 - WordPenalty = +0.067576 - LanguageModel = +1.155 - LanguageModel_OOV = -0.2728 - PhraseModel_0 = -0.025529 - PhraseModel_1 = +0.095869 - PhraseModel_2 = +0.094567 - PhraseModel_3 = +0.12482 - PhraseModel_4 = -0.36533 - PhraseModel_5 = +0.1068 - PhraseModel_6 = -0.1517 - PassThrough = -0.286 - --- - 1best avg score: 0.18394 (+0.015221) - 1best avg model score: 3.205 (+5.0326) - avg # pairs: 1168.3 - avg # rank err: 594.8 - avg # margin viol: 0 - non0 feature count: 543 - avg list sz: 77.5 - avg f count: 85.916 -(time 0.083 min, 0.5 s/S) - -Iteration #3 of 3. - . 10 -WEIGHTS - Glue = -0.392 - WordPenalty = +0.071963 - LanguageModel = +0.81266 - LanguageModel_OOV = -0.4177 - PhraseModel_0 = -0.2649 - PhraseModel_1 = -0.17931 - PhraseModel_2 = +0.038261 - PhraseModel_3 = +0.20261 - PhraseModel_4 = -0.42621 - PhraseModel_5 = +0.3198 - PhraseModel_6 = -0.1437 - PassThrough = -0.4309 - --- - 1best avg score: 0.2962 (+0.11225) - 1best avg model score: -36.274 (-39.479) - avg # pairs: 1109.6 - avg # rank err: 515.9 - avg # margin viol: 0 - non0 feature count: 741 - avg list sz: 77 - avg f count: 88.982 -(time 0.083 min, 0.5 s/S) - -Writing weights file to '-' ... -done - ---- -Best iteration: 3 [SCORE 'stupid_bleu'=0.2962]. -This took 0.26667 min. diff --git a/dtrain/test/toy/cdec.ini b/dtrain/test/toy/cdec.ini deleted file mode 100644 index 98b02d44..00000000 --- a/dtrain/test/toy/cdec.ini +++ /dev/null @@ -1,2 +0,0 @@ -formalism=scfg -add_pass_through_rules=true diff --git a/dtrain/test/toy/dtrain.ini b/dtrain/test/toy/dtrain.ini deleted file mode 100644 index a091732f..00000000 --- a/dtrain/test/toy/dtrain.ini +++ /dev/null @@ -1,12 +0,0 @@ -decoder_config=test/toy/cdec.ini -input=test/toy/input -output=- -print_weights=logp shell_rule house_rule small_rule little_rule PassThrough -k=4 -N=4 -epochs=2 -scorer=bleu -sample_from=kbest -filter=uniq -pair_sampling=all -learning_rate=1 diff --git a/dtrain/test/toy/input b/dtrain/test/toy/input deleted file mode 100644 index 4d10a9ea..00000000 --- a/dtrain/test/toy/input +++ /dev/null @@ -1,2 +0,0 @@ -0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 -1 ich fand ein kleines haus i found a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 [NP] ||| ich ||| i ||| logp=0 [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 house_rule=1 [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 shell_rule=1 [JJ] ||| kleines ||| small ||| logp=0 small_rule=1 [JJ] ||| kleines ||| little ||| logp=0 little_rule=1 [JJ] ||| grosses ||| big ||| logp=0 [JJ] ||| grosses ||| large ||| logp=0 [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 [V] ||| sah ||| saw ||| logp=0 [V] ||| fand ||| found ||| logp=0 |