summaryrefslogtreecommitdiff
path: root/dtrain/test
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cab.ark.cs.cmu.edu>2012-06-19 00:05:18 -0400
committerChris Dyer <cdyer@cab.ark.cs.cmu.edu>2012-06-19 00:05:18 -0400
commit5975dcaa50adb5ce7a05b83583b8f9ddc45f3f0a (patch)
tree2bc2eb4e17576e0726d7a2fa7f20eac9061c311d /dtrain/test
parent78cc819168b2a550e52e9cac06dbbed41a3b04b2 (diff)
parentee1520c5095ea8648617a3658b20eedfd4dd2007 (diff)
Merge branch 'master' of https://github.com/pks/cdec-dtrain
Diffstat (limited to 'dtrain/test')
-rw-r--r--dtrain/test/example/README4
-rw-r--r--dtrain/test/example/dtrain.ini3
-rw-r--r--dtrain/test/example/expected-output125
3 files changed, 129 insertions, 3 deletions
diff --git a/dtrain/test/example/README b/dtrain/test/example/README
index b3ea5f06..6937b11b 100644
--- a/dtrain/test/example/README
+++ b/dtrain/test/example/README
@@ -1,8 +1,8 @@
Small example of input format for distributed training.
Call dtrain from cdec/dtrain/ with ./dtrain -c test/example/dtrain.ini .
-For this to work, disable '#define DTRAIN_LOCAL' from dtrain.h
+For this to work, undef 'DTRAIN_LOCAL' in dtrain.h
and recompile.
-Data is here: http://simianer.de/dtrain
+Data is here: http://simianer.de/#dtrain
diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini
index f87ee9cf..c8ac7c3f 100644
--- a/dtrain/test/example/dtrain.ini
+++ b/dtrain/test/example/dtrain.ini
@@ -5,7 +5,7 @@ decoder_config=test/example/cdec.ini # config for cdec
# weights for these features will be printed on each iteration
print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
tmp=/tmp
-stop_after=10 # stop epoch after 20 inputs
+stop_after=10 # stop epoch after 10 inputs
# interesting stuff
epochs=3 # run over input 3 times
@@ -19,3 +19,4 @@ filter=uniq # only unique entries in kbest (surface form)
pair_sampling=XYX
hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here
pair_threshold=0 # minimum distance in BLEU (this will still only use pairs with diff > 0)
+loss_margin=0
diff --git a/dtrain/test/example/expected-output b/dtrain/test/example/expected-output
new file mode 100644
index 00000000..25d2c069
--- /dev/null
+++ b/dtrain/test/example/expected-output
@@ -0,0 +1,125 @@
+ cdec cfg 'test/example/cdec.ini'
+feature: WordPenalty (no config parameters)
+State is 0 bytes for feature WordPenalty
+feature: KLanguageModel (with config parameters 'test/example/nc-wmt11.en.srilm.gz')
+Loading the LM will be faster if you build a binary file.
+Reading test/example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+Loaded 5-gram KLM from test/example/nc-wmt11.en.srilm.gz (MapSize=49581)
+State is 98 bytes for feature KLanguageModel test/example/nc-wmt11.en.srilm.gz
+feature: RuleIdentityFeatures (no config parameters)
+State is 0 bytes for feature RuleIdentityFeatures
+feature: RuleNgramFeatures (no config parameters)
+State is 0 bytes for feature RuleNgramFeatures
+feature: RuleShape (no config parameters)
+ Example feature: Shape_S00000_T00000
+State is 0 bytes for feature RuleShape
+Seeding random number sequence to 1072059181
+
+dtrain
+Parameters:
+ k 100
+ N 4
+ T 3
+ scorer 'stupid_bleu'
+ sample from 'kbest'
+ filter 'uniq'
+ learning rate 0.0001
+ gamma 0
+ loss margin 0
+ pairs 'XYX'
+ hi lo 0.1
+ pair threshold 0
+ select weights 'VOID'
+ l1 reg 0 'none'
+ cdec cfg 'test/example/cdec.ini'
+ input 'test/example/nc-wmt11.1k.gz'
+ output '-'
+ stop_after 10
+(a dot represents 10 inputs)
+Iteration #1 of 3.
+ . 10
+Stopping after 10 input sentences.
+WEIGHTS
+ Glue = -0.0293
+ WordPenalty = +0.049075
+ LanguageModel = +0.24345
+ LanguageModel_OOV = -0.2029
+ PhraseModel_0 = +0.0084102
+ PhraseModel_1 = +0.021729
+ PhraseModel_2 = +0.014922
+ PhraseModel_3 = +0.104
+ PhraseModel_4 = -0.14308
+ PhraseModel_5 = +0.0247
+ PhraseModel_6 = -0.012
+ PassThrough = -0.2161
+ ---
+ 1best avg score: 0.16872 (+0.16872)
+ 1best avg model score: -1.8276 (-1.8276)
+ avg # pairs: 1121.1
+ avg # rank err: 555.6
+ avg # margin viol: 0
+ non0 feature count: 277
+ avg list sz: 77.2
+ avg f count: 90.96
+(time 0.1 min, 0.6 s/S)
+
+Iteration #2 of 3.
+ . 10
+WEIGHTS
+ Glue = -0.3526
+ WordPenalty = +0.067576
+ LanguageModel = +1.155
+ LanguageModel_OOV = -0.2728
+ PhraseModel_0 = -0.025529
+ PhraseModel_1 = +0.095869
+ PhraseModel_2 = +0.094567
+ PhraseModel_3 = +0.12482
+ PhraseModel_4 = -0.36533
+ PhraseModel_5 = +0.1068
+ PhraseModel_6 = -0.1517
+ PassThrough = -0.286
+ ---
+ 1best avg score: 0.18394 (+0.015221)
+ 1best avg model score: 3.205 (+5.0326)
+ avg # pairs: 1168.3
+ avg # rank err: 594.8
+ avg # margin viol: 0
+ non0 feature count: 543
+ avg list sz: 77.5
+ avg f count: 85.916
+(time 0.083 min, 0.5 s/S)
+
+Iteration #3 of 3.
+ . 10
+WEIGHTS
+ Glue = -0.392
+ WordPenalty = +0.071963
+ LanguageModel = +0.81266
+ LanguageModel_OOV = -0.4177
+ PhraseModel_0 = -0.2649
+ PhraseModel_1 = -0.17931
+ PhraseModel_2 = +0.038261
+ PhraseModel_3 = +0.20261
+ PhraseModel_4 = -0.42621
+ PhraseModel_5 = +0.3198
+ PhraseModel_6 = -0.1437
+ PassThrough = -0.4309
+ ---
+ 1best avg score: 0.2962 (+0.11225)
+ 1best avg model score: -36.274 (-39.479)
+ avg # pairs: 1109.6
+ avg # rank err: 515.9
+ avg # margin viol: 0
+ non0 feature count: 741
+ avg list sz: 77
+ avg f count: 88.982
+(time 0.083 min, 0.5 s/S)
+
+Writing weights file to '-' ...
+done
+
+---
+Best iteration: 3 [SCORE 'stupid_bleu'=0.2962].
+This took 0.26667 min.