diff options
author | Patrick Simianer <p@simianer.de> | 2015-01-23 15:50:27 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2015-01-23 15:50:27 +0100 |
commit | 32dea3f24e56ac7c17343457c48f750f16838742 (patch) | |
tree | 79177b58cbff08c14991a0da8e851912b1c06309 /training/dtrain/examples | |
parent | 556dc935c7a2d8df78a35447d20d71b4bf6e391a (diff) |
dtrain: multi-reference BLEU
Diffstat (limited to 'training/dtrain/examples')
-rw-r--r-- | training/dtrain/examples/standard/expected-output | 123 | ||||
-rw-r--r-- | training/dtrain/examples/standard/expected-output.gz | bin | 0 -> 625304 bytes | |||
-rw-r--r-- | training/dtrain/examples/standard/nc-wmt11.de.gz | bin | 58324 -> 0 bytes | |||
-rw-r--r-- | training/dtrain/examples/standard/nc-wmt11.en.gz | bin | 49600 -> 0 bytes | |||
-rw-r--r-- | training/dtrain/examples/toy/dtrain.ini | 3 | ||||
-rw-r--r-- | training/dtrain/examples/toy/expected-output | 31 | ||||
-rw-r--r-- | training/dtrain/examples/toy/in | 2 | ||||
-rw-r--r-- | training/dtrain/examples/toy/src | 2 | ||||
-rw-r--r-- | training/dtrain/examples/toy/tgt | 2 |
9 files changed, 27 insertions, 136 deletions
diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output deleted file mode 100644 index 2460cfbb..00000000 --- a/training/dtrain/examples/standard/expected-output +++ /dev/null @@ -1,123 +0,0 @@ - cdec cfg './cdec.ini' -Loading the LM will be faster if you build a binary file. -Reading ./nc-wmt11.en.srilm.gz -----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 -**************************************************************************************************** - Example feature: Shape_S00000_T00000 -T=1 I=1 D=1 -Seeding random number sequence to 2327685089 - -dtrain -Parameters: - k 100 - N 4 - T 3 - batch 0 - scorer 'fixed_stupid_bleu' - sample from 'kbest' - filter 'uniq' - learning rate 0.1 - gamma 0 - loss margin 0 - faster perceptron 1 - pairs 'XYX' - hi lo 0.1 - pair threshold 0 - select weights 'avg' - l1 reg 0 'none' - pclr no - max pairs 4294967295 - repeat 1 - cdec cfg './cdec.ini' - input './nc-wmt11.gz' - output '-' - stop_after 10 -(a dot represents 10 inputs) -Iteration #1 of 3. - . 10 -Stopping after 10 input sentences. -WEIGHTS - Glue = +6.9 - WordPenalty = -46.426 - LanguageModel = +535.12 - LanguageModel_OOV = -123.5 - PhraseModel_0 = -160.73 - PhraseModel_1 = -350.13 - PhraseModel_2 = -187.81 - PhraseModel_3 = +172.04 - PhraseModel_4 = +0.90108 - PhraseModel_5 = +21.6 - PhraseModel_6 = +67.2 - PassThrough = -149.7 - --- - 1best avg score: 0.23327 (+0.23327) - 1best avg model score: -9084.9 (-9084.9) - avg # pairs: 780.7 - avg # rank err: 0 (meaningless) - avg # margin viol: 0 - k-best loss imp: 100% - non0 feature count: 1389 - avg list sz: 91.3 - avg f count: 146.2 -(time 0.37 min, 2.2 s/S) - -Iteration #2 of 3. - . 10 -WEIGHTS - Glue = -43 - WordPenalty = -22.019 - LanguageModel = +591.53 - LanguageModel_OOV = -252.1 - PhraseModel_0 = -120.21 - PhraseModel_1 = -43.589 - PhraseModel_2 = +73.53 - PhraseModel_3 = +113.7 - PhraseModel_4 = -223.81 - PhraseModel_5 = +64 - PhraseModel_6 = +54.8 - PassThrough = -331.1 - --- - 1best avg score: 0.29568 (+0.062413) - 1best avg model score: -15879 (-6794.1) - avg # pairs: 566.1 - avg # rank err: 0 (meaningless) - avg # margin viol: 0 - k-best loss imp: 100% - non0 feature count: 1931 - avg list sz: 91.3 - avg f count: 139.89 -(time 0.33 min, 2 s/S) - -Iteration #3 of 3. - . 10 -WEIGHTS - Glue = -44.3 - WordPenalty = -131.85 - LanguageModel = +230.91 - LanguageModel_OOV = -285.4 - PhraseModel_0 = -194.27 - PhraseModel_1 = -294.83 - PhraseModel_2 = -92.043 - PhraseModel_3 = -140.24 - PhraseModel_4 = +85.613 - PhraseModel_5 = +238.1 - PhraseModel_6 = +158.7 - PassThrough = -359.6 - --- - 1best avg score: 0.37375 (+0.078067) - 1best avg model score: -14519 (+1359.7) - avg # pairs: 545.4 - avg # rank err: 0 (meaningless) - avg # margin viol: 0 - k-best loss imp: 100% - non0 feature count: 2218 - avg list sz: 91.3 - avg f count: 137.77 -(time 0.35 min, 2.1 s/S) - -Writing weights file to '-' ... -done - ---- -Best iteration: 3 [SCORE 'fixed_stupid_bleu'=0.37375]. -This took 1.05 min. diff --git a/training/dtrain/examples/standard/expected-output.gz b/training/dtrain/examples/standard/expected-output.gz Binary files differnew file mode 100644 index 00000000..f93a253e --- /dev/null +++ b/training/dtrain/examples/standard/expected-output.gz diff --git a/training/dtrain/examples/standard/nc-wmt11.de.gz b/training/dtrain/examples/standard/nc-wmt11.de.gz Binary files differdeleted file mode 100644 index 0741fd92..00000000 --- a/training/dtrain/examples/standard/nc-wmt11.de.gz +++ /dev/null diff --git a/training/dtrain/examples/standard/nc-wmt11.en.gz b/training/dtrain/examples/standard/nc-wmt11.en.gz Binary files differdeleted file mode 100644 index 1c0bd401..00000000 --- a/training/dtrain/examples/standard/nc-wmt11.en.gz +++ /dev/null diff --git a/training/dtrain/examples/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini index ef956df7..70c7331c 100644 --- a/training/dtrain/examples/toy/dtrain.ini +++ b/training/dtrain/examples/toy/dtrain.ini @@ -1,6 +1,5 @@ decoder_config=cdec.ini -input=src -refs=tgt +bitext=in output=- print_weights=logp shell_rule house_rule small_rule little_rule PassThrough PassThrough_1 PassThrough_2 PassThrough_3 PassThrough_4 PassThrough_5 PassThrough_6 k=4 diff --git a/training/dtrain/examples/toy/expected-output b/training/dtrain/examples/toy/expected-output index 1da2aadd..fbee24e3 100644 --- a/training/dtrain/examples/toy/expected-output +++ b/training/dtrain/examples/toy/expected-output @@ -1,26 +1,29 @@ Warning: hi_lo only works with pair_sampling XYX. cdec cfg 'cdec.ini' -Seeding random number sequence to 1664825829 +Seeding random number sequence to 3626026233 dtrain Parameters: k 4 N 4 T 2 + batch 0 scorer 'bleu' sample from 'kbest' filter 'uniq' learning rate 1 gamma 0 loss margin 0 + faster perceptron 1 pairs 'all' pair threshold 0 select weights 'last' l1 reg 0 'none' + pclr no max pairs 4294967295 + repeat 1 cdec cfg 'cdec.ini' - input 'src' - refs 'tgt' + input '' output '-' (a dot represents 10 inputs) Iteration #1 of 2. @@ -32,12 +35,19 @@ WEIGHTS small_rule = -2 little_rule = +3 PassThrough = -5 + PassThrough_1 = +0 + PassThrough_2 = +0 + PassThrough_3 = +0 + PassThrough_4 = +0 + PassThrough_5 = +0 + PassThrough_6 = +0 --- 1best avg score: 0.5 (+0.5) 1best avg model score: 2.5 (+2.5) - avg # pairs: 4 - avg # rank err: 1.5 + avg # pairs: 1.5 + avg # rank err: 1.5 (meaningless) avg # margin viol: 0 + k-best loss imp: 100% non0 feature count: 6 avg list sz: 4 avg f count: 2.875 @@ -52,12 +62,19 @@ WEIGHTS small_rule = -2 little_rule = +3 PassThrough = -5 + PassThrough_1 = +0 + PassThrough_2 = +0 + PassThrough_3 = +0 + PassThrough_4 = +0 + PassThrough_5 = +0 + PassThrough_6 = +0 --- 1best avg score: 1 (+0.5) 1best avg model score: 5 (+2.5) - avg # pairs: 5 - avg # rank err: 0 + avg # pairs: 0 + avg # rank err: 0 (meaningless) avg # margin viol: 0 + k-best loss imp: 100% non0 feature count: 6 avg list sz: 4 avg f count: 3 diff --git a/training/dtrain/examples/toy/in b/training/dtrain/examples/toy/in new file mode 100644 index 00000000..5d70795d --- /dev/null +++ b/training/dtrain/examples/toy/in @@ -0,0 +1,2 @@ +ich sah ein kleines haus ||| i saw a little house +ich fand ein kleines haus ||| i found a little house diff --git a/training/dtrain/examples/toy/src b/training/dtrain/examples/toy/src deleted file mode 100644 index 87e39ef2..00000000 --- a/training/dtrain/examples/toy/src +++ /dev/null @@ -1,2 +0,0 @@ -ich sah ein kleines haus -ich fand ein kleines haus diff --git a/training/dtrain/examples/toy/tgt b/training/dtrain/examples/toy/tgt deleted file mode 100644 index 174926b3..00000000 --- a/training/dtrain/examples/toy/tgt +++ /dev/null @@ -1,2 +0,0 @@ -i saw a little house -i found a little house |