summaryrefslogtreecommitdiff
path: root/dtrain/test
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-04-26 21:39:11 +0200
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-04-26 21:39:11 +0200
commit01110e92e7429df7882879e026b28aa9c89c724d (patch)
treef5e03f63c8ae907696582aaa66953cc5cd911610 /dtrain/test
parent28806638345e60bd442bf5fa8e7471f9115b0296 (diff)
made pair sampling configurable
Diffstat (limited to 'dtrain/test')
-rw-r--r--dtrain/test/example/README6
-rw-r--r--dtrain/test/example/dtrain.ini9
2 files changed, 11 insertions, 4 deletions
diff --git a/dtrain/test/example/README b/dtrain/test/example/README
new file mode 100644
index 00000000..e5a5de59
--- /dev/null
+++ b/dtrain/test/example/README
@@ -0,0 +1,6 @@
+Small example of input format for distributed training.
+Call dtrain from cdec/dtrain/ with ./dtrain -c test/example/dtrain.ini .
+
+For this to work, disable '#define DTRAIN_LOCAL' from dtrain.h
+and recompile.
+
diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini
index b59250f3..cd2c75e7 100644
--- a/dtrain/test/example/dtrain.ini
+++ b/dtrain/test/example/dtrain.ini
@@ -1,5 +1,5 @@
input=test/example/nc-wmt11.1k.gz # use '-' for STDIN
-output=weights.gz # a weights file (add .gz for gzip compression) or STDOUT '-'
+output=- # a weights file (add .gz for gzip compression) or STDOUT '-'
decoder_config=test/example/cdec.ini # config for cdec
# weights for these features will be printed on each iteration
print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough
@@ -10,11 +10,12 @@ stop_after=100 # stop epoch after 100 inputs
epochs=3 # run over input 3 times
k=100 # use 100best lists
N=4 # optimize (approx) BLEU4
-scorer=stupid_bleu # use 'stupid' BLEU+1
+scorer=approx_bleu # use 'stupid' BLEU+1
learning_rate=0.0001 # learning rate
gamma=0 # use SVM reg
sample_from=kbest # use kbest lists (as opposed to forest)
filter=uniq # only unique entries in kbest (surface form)
-pair_sampling=108010 # 10 vs 80 vs 10 and 80 vs 10
+pair_sampling=XYX
+hi_lo=0.1 # 10 vs 80 vs 10 and 80 vs 10
pair_threshold=0 # minimum distance in BLEU (this will still only use pairs with diff > 0)
-select_weights=last # just output last weights
+select_weights=VOID # don't output weights