summaryrefslogtreecommitdiff
path: root/training/dtrain/test/example
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2012-11-18 13:35:42 -0500
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2012-11-18 13:35:42 -0500
commit8aa29810bb77611cc20b7a384897ff6703783ea1 (patch)
tree8635daa8fffb3f2cd90e30b41e27f4f9e0909447 /training/dtrain/test/example
parentfbdacabc85bea65d735f2cb7f92b98e08ce72d04 (diff)
major restructure of the training code
Diffstat (limited to 'training/dtrain/test/example')
-rw-r--r--training/dtrain/test/example/README8
-rw-r--r--training/dtrain/test/example/cdec.ini25
-rw-r--r--training/dtrain/test/example/dtrain.ini22
-rw-r--r--training/dtrain/test/example/expected-output89
4 files changed, 144 insertions, 0 deletions
diff --git a/training/dtrain/test/example/README b/training/dtrain/test/example/README
new file mode 100644
index 00000000..6937b11b
--- /dev/null
+++ b/training/dtrain/test/example/README
@@ -0,0 +1,8 @@
+Small example of input format for distributed training.
+Call dtrain from cdec/dtrain/ with ./dtrain -c test/example/dtrain.ini .
+
+For this to work, undef 'DTRAIN_LOCAL' in dtrain.h
+and recompile.
+
+Data is here: http://simianer.de/#dtrain
+
diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/test/example/cdec.ini
new file mode 100644
index 00000000..d5955f0e
--- /dev/null
+++ b/training/dtrain/test/example/cdec.ini
@@ -0,0 +1,25 @@
+formalism=scfg
+add_pass_through_rules=true
+scfg_max_span_limit=15
+intersection_strategy=cube_pruning
+cubepruning_pop_limit=30
+feature_function=WordPenalty
+feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz
+# all currently working feature functions for translation:
+# (with those features active that were used in the ACL paper)
+#feature_function=ArityPenalty
+#feature_function=CMR2008ReorderingFeatures
+#feature_function=Dwarf
+#feature_function=InputIndicator
+#feature_function=LexNullJump
+#feature_function=NewJump
+#feature_function=NgramFeatures
+#feature_function=NonLatinCount
+#feature_function=OutputIndicator
+feature_function=RuleIdentityFeatures
+feature_function=RuleSourceBigramFeatures
+feature_function=RuleTargetBigramFeatures
+feature_function=RuleShape
+#feature_function=SourceSpanSizeFeatures
+#feature_function=SourceWordPenalty
+#feature_function=SpanFeatures
diff --git a/training/dtrain/test/example/dtrain.ini b/training/dtrain/test/example/dtrain.ini
new file mode 100644
index 00000000..72d50ca1
--- /dev/null
+++ b/training/dtrain/test/example/dtrain.ini
@@ -0,0 +1,22 @@
+input=test/example/nc-wmt11.1k.gz # use '-' for STDIN
+output=- # a weights file (add .gz for gzip compression) or STDOUT '-'
+select_weights=VOID # don't output weights
+decoder_config=test/example/cdec.ini # config for cdec
+# weights for these features will be printed on each iteration
+print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
+tmp=/tmp
+stop_after=10 # stop epoch after 10 inputs
+
+# interesting stuff
+epochs=2 # run over input 2 times
+k=100 # use 100best lists
+N=4 # optimize (approx) BLEU4
+scorer=stupid_bleu # use 'stupid' BLEU+1
+learning_rate=1.0 # learning rate, don't care if gamma=0 (perceptron)
+gamma=0 # use SVM reg
+sample_from=kbest # use kbest lists (as opposed to forest)
+filter=uniq # only unique entries in kbest (surface form)
+pair_sampling=XYX
+hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10 here
+pair_threshold=0 # minimum distance in BLEU (this will still only use pairs with diff > 0)
+loss_margin=0
diff --git a/training/dtrain/test/example/expected-output b/training/dtrain/test/example/expected-output
new file mode 100644
index 00000000..05326763
--- /dev/null
+++ b/training/dtrain/test/example/expected-output
@@ -0,0 +1,89 @@
+ cdec cfg 'test/example/cdec.ini'
+Loading the LM will be faster if you build a binary file.
+Reading test/example/nc-wmt11.en.srilm.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+ Example feature: Shape_S00000_T00000
+Seeding random number sequence to 2912000813
+
+dtrain
+Parameters:
+ k 100
+ N 4
+ T 2
+ scorer 'stupid_bleu'
+ sample from 'kbest'
+ filter 'uniq'
+ learning rate 1
+ gamma 0
+ loss margin 0
+ pairs 'XYX'
+ hi lo 0.1
+ pair threshold 0
+ select weights 'VOID'
+ l1 reg 0 'none'
+ max pairs 4294967295
+ cdec cfg 'test/example/cdec.ini'
+ input 'test/example/nc-wmt11.1k.gz'
+ output '-'
+ stop_after 10
+(a dot represents 10 inputs)
+Iteration #1 of 2.
+ . 10
+Stopping after 10 input sentences.
+WEIGHTS
+ Glue = -637
+ WordPenalty = +1064
+ LanguageModel = +1175.3
+ LanguageModel_OOV = -1437
+ PhraseModel_0 = +1935.6
+ PhraseModel_1 = +2499.3
+ PhraseModel_2 = +964.96
+ PhraseModel_3 = +1410.8
+ PhraseModel_4 = -5977.9
+ PhraseModel_5 = +522
+ PhraseModel_6 = +1089
+ PassThrough = -1308
+ ---
+ 1best avg score: 0.16963 (+0.16963)
+ 1best avg model score: 64485 (+64485)
+ avg # pairs: 1494.4
+ avg # rank err: 702.6
+ avg # margin viol: 0
+ non0 feature count: 528
+ avg list sz: 85.7
+ avg f count: 102.75
+(time 0.083 min, 0.5 s/S)
+
+Iteration #2 of 2.
+ . 10
+WEIGHTS
+ Glue = -1196
+ WordPenalty = +809.52
+ LanguageModel = +3112.1
+ LanguageModel_OOV = -1464
+ PhraseModel_0 = +3895.5
+ PhraseModel_1 = +4683.4
+ PhraseModel_2 = +1092.8
+ PhraseModel_3 = +1079.6
+ PhraseModel_4 = -6827.7
+ PhraseModel_5 = -888
+ PhraseModel_6 = +142
+ PassThrough = -1335
+ ---
+ 1best avg score: 0.277 (+0.10736)
+ 1best avg model score: -3110.5 (-67595)
+ avg # pairs: 1144.2
+ avg # rank err: 529.1
+ avg # margin viol: 0
+ non0 feature count: 859
+ avg list sz: 74.9
+ avg f count: 112.84
+(time 0.067 min, 0.4 s/S)
+
+Writing weights file to '-' ...
+done
+
+---
+Best iteration: 2 [SCORE 'stupid_bleu'=0.277].
+This took 0.15 min.