dtrain: multi-reference BLEU

author: Patrick Simianer <p@simianer.de> 2015-01-23 15:50:27 +0100
committer: Patrick Simianer <p@simianer.de> 2015-01-23 15:50:27 +0100
commit: 32dea3f24e56ac7c17343457c48f750f16838742 (patch)
tree: 79177b58cbff08c14991a0da8e851912b1c06309 /training/dtrain/examples
parent: 556dc935c7a2d8df78a35447d20d71b4bf6e391a (diff)
9 files changed, 27 insertions, 136 deletions
diff --git a/training/dtrain/examples/standard/expected-output b/training/dtrain/examples/standard/expected-output
deleted file mode 100644
index 2460cfbb..00000000
--- a/training/dtrain/examples/standard/expected-output
+++ /dev/null
@@ -1,123 +0,0 @@
-                cdec cfg './cdec.ini'
-Loading the LM will be faster if you build a binary file.
-Reading ./nc-wmt11.en.srilm.gz
-----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
-****************************************************************************************************
-  Example feature: Shape_S00000_T00000
-T=1 I=1 D=1
-Seeding random number sequence to 2327685089
-
-dtrain
-Parameters:
-                       k 100
-                       N 4
-                       T 3
-                   batch 0
-                  scorer 'fixed_stupid_bleu'
-             sample from 'kbest'
-                  filter 'uniq'
-           learning rate 0.1
-                   gamma 0
-             loss margin 0
-       faster perceptron 1
-                   pairs 'XYX'
-                   hi lo 0.1
-          pair threshold 0
-          select weights 'avg'
-                  l1 reg 0 'none'
-                    pclr no
-               max pairs 4294967295
-                  repeat 1
-                cdec cfg './cdec.ini'
-                   input './nc-wmt11.gz'
-                  output '-'
-              stop_after 10
-(a dot represents 10 inputs)
-Iteration #1 of 3.
- . 10
-Stopping after 10 input sentences.
-WEIGHTS
-              Glue = +6.9
-       WordPenalty = -46.426
-     LanguageModel = +535.12
- LanguageModel_OOV = -123.5
-     PhraseModel_0 = -160.73
-     PhraseModel_1 = -350.13
-     PhraseModel_2 = -187.81
-     PhraseModel_3 = +172.04
-     PhraseModel_4 = +0.90108
-     PhraseModel_5 = +21.6
-     PhraseModel_6 = +67.2
-       PassThrough = -149.7
-        ---
-       1best avg score: 0.23327 (+0.23327)
- 1best avg model score: -9084.9 (-9084.9)
-           avg # pairs: 780.7
-        avg # rank err: 0 (meaningless)
-     avg # margin viol: 0
-       k-best loss imp: 100%
-    non0 feature count: 1389
-           avg list sz: 91.3
-           avg f count: 146.2
-(time 0.37 min, 2.2 s/S)
-
-Iteration #2 of 3.
- . 10
-WEIGHTS
-              Glue = -43
-       WordPenalty = -22.019
-     LanguageModel = +591.53
- LanguageModel_OOV = -252.1
-     PhraseModel_0 = -120.21
-     PhraseModel_1 = -43.589
-     PhraseModel_2 = +73.53
-     PhraseModel_3 = +113.7
-     PhraseModel_4 = -223.81
-     PhraseModel_5 = +64
-     PhraseModel_6 = +54.8
-       PassThrough = -331.1
-        ---
-       1best avg score: 0.29568 (+0.062413)
- 1best avg model score: -15879 (-6794.1)
-           avg # pairs: 566.1
-        avg # rank err: 0 (meaningless)
-     avg # margin viol: 0
-       k-best loss imp: 100%
-    non0 feature count: 1931
-           avg list sz: 91.3
-           avg f count: 139.89
-(time 0.33 min, 2 s/S)
-
-Iteration #3 of 3.
- . 10
-WEIGHTS
-              Glue = -44.3
-       WordPenalty = -131.85
-     LanguageModel = +230.91
- LanguageModel_OOV = -285.4
-     PhraseModel_0 = -194.27
-     PhraseModel_1 = -294.83
-     PhraseModel_2 = -92.043
-     PhraseModel_3 = -140.24
-     PhraseModel_4 = +85.613
-     PhraseModel_5 = +238.1
-     PhraseModel_6 = +158.7
-       PassThrough = -359.6
-        ---
-       1best avg score: 0.37375 (+0.078067)
- 1best avg model score: -14519 (+1359.7)
-           avg # pairs: 545.4
-        avg # rank err: 0 (meaningless)
-     avg # margin viol: 0
-       k-best loss imp: 100%
-    non0 feature count: 2218
-           avg list sz: 91.3
-           avg f count: 137.77
-(time 0.35 min, 2.1 s/S)
-
-Writing weights file to '-' ...
-done
-
----
-Best iteration: 3 [SCORE 'fixed_stupid_bleu'=0.37375].
-This took 1.05 min.
diff --git a/training/dtrain/examples/standard/expected-output.gz b/training/dtrain/examples/standard/expected-output.gz
new file mode 100644
index 00000000..f93a253e
--- /dev/null
+++ b/training/dtrain/examples/standard/expected-output.gz
diff --git a/training/dtrain/examples/standard/nc-wmt11.de.gz b/training/dtrain/examples/standard/nc-wmt11.de.gz
deleted file mode 100644
index 0741fd92..00000000
--- a/training/dtrain/examples/standard/nc-wmt11.de.gz
+++ /dev/null
diff --git a/training/dtrain/examples/standard/nc-wmt11.en.gz b/training/dtrain/examples/standard/nc-wmt11.en.gz
deleted file mode 100644
index 1c0bd401..00000000
--- a/training/dtrain/examples/standard/nc-wmt11.en.gz
+++ /dev/null
diff --git a/training/dtrain/examples/toy/dtrain.ini b/training/dtrain/examples/toy/dtrain.ini
index ef956df7..70c7331c 100644
--- a/training/dtrain/examples/toy/dtrain.ini
+++ b/training/dtrain/examples/toy/dtrain.ini
@@ -1,6 +1,5 @@
 decoder_config=cdec.ini
-input=src
-refs=tgt
+bitext=in
 output=-
 print_weights=logp shell_rule house_rule small_rule little_rule PassThrough PassThrough_1 PassThrough_2 PassThrough_3 PassThrough_4 PassThrough_5 PassThrough_6
 k=4
diff --git a/training/dtrain/examples/toy/expected-output b/training/dtrain/examples/toy/expected-output
index 1da2aadd..fbee24e3 100644
--- a/training/dtrain/examples/toy/expected-output
+++ b/training/dtrain/examples/toy/expected-output
@@ -1,26 +1,29 @@
 Warning: hi_lo only works with pair_sampling XYX.
                 cdec cfg 'cdec.ini'
-Seeding random number sequence to 1664825829
+Seeding random number sequence to 3626026233
 
 dtrain
 Parameters:
                        k 4
                        N 4
                        T 2
+                   batch 0
                   scorer 'bleu'
              sample from 'kbest'
                   filter 'uniq'
            learning rate 1
                    gamma 0
              loss margin 0
+       faster perceptron 1
                    pairs 'all'
           pair threshold 0
           select weights 'last'
                   l1 reg 0 'none'
+                    pclr no
                max pairs 4294967295
+                  repeat 1
                 cdec cfg 'cdec.ini'
-                   input 'src'
-                    refs 'tgt'
+                   input ''
                   output '-'
 (a dot represents 10 inputs)
 Iteration #1 of 2.
@@ -32,12 +35,19 @@ WEIGHTS
         small_rule = -2
        little_rule = +3
        PassThrough = -5
+     PassThrough_1 = +0
+     PassThrough_2 = +0
+     PassThrough_3 = +0
+     PassThrough_4 = +0
+     PassThrough_5 = +0
+     PassThrough_6 = +0
         ---
        1best avg score: 0.5 (+0.5)
  1best avg model score: 2.5 (+2.5)
-           avg # pairs: 4
-        avg # rank err: 1.5
+           avg # pairs: 1.5
+        avg # rank err: 1.5 (meaningless)
      avg # margin viol: 0
+       k-best loss imp: 100%
     non0 feature count: 6
            avg list sz: 4
            avg f count: 2.875
@@ -52,12 +62,19 @@ WEIGHTS
         small_rule = -2
        little_rule = +3
        PassThrough = -5
+     PassThrough_1 = +0
+     PassThrough_2 = +0
+     PassThrough_3 = +0
+     PassThrough_4 = +0
+     PassThrough_5 = +0
+     PassThrough_6 = +0
         ---
        1best avg score: 1 (+0.5)
  1best avg model score: 5 (+2.5)
-           avg # pairs: 5
-        avg # rank err: 0
+           avg # pairs: 0
+        avg # rank err: 0 (meaningless)
      avg # margin viol: 0
+       k-best loss imp: 100%
     non0 feature count: 6
            avg list sz: 4
            avg f count: 3
diff --git a/training/dtrain/examples/toy/in b/training/dtrain/examples/toy/in
new file mode 100644
index 00000000..5d70795d
--- /dev/null
+++ b/training/dtrain/examples/toy/in
@@ -0,0 +1,2 @@
+ich sah ein kleines haus ||| i saw a little house
+ich fand ein kleines haus ||| i found a little house
diff --git a/training/dtrain/examples/toy/src b/training/dtrain/examples/toy/src
deleted file mode 100644
index 87e39ef2..00000000
--- a/training/dtrain/examples/toy/src
+++ /dev/null
@@ -1,2 +0,0 @@
-ich sah ein kleines haus
-ich fand ein kleines haus
diff --git a/training/dtrain/examples/toy/tgt b/training/dtrain/examples/toy/tgt
deleted file mode 100644
index 174926b3..00000000
--- a/training/dtrain/examples/toy/tgt
+++ /dev/null
@@ -1,2 +0,0 @@
-i saw a little house
-i found a little house
author	Patrick Simianer <p@simianer.de>	2015-01-23 15:50:27 +0100
committer	Patrick Simianer <p@simianer.de>	2015-01-23 15:50:27 +0100
commit	32dea3f24e56ac7c17343457c48f750f16838742 (patch)
tree	79177b58cbff08c14991a0da8e851912b1c06309 /training/dtrain/examples
parent	556dc935c7a2d8df78a35447d20d71b4bf6e391a (diff)