diff options
author | Patrick Simianer <p@simianer.de> | 2012-03-13 09:15:46 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2012-03-13 09:15:46 +0100 |
commit | 10a232656a0c882b3b955d2bcfac138ce11e8a2e (patch) | |
tree | 134e2637908cd85b3548d68ac8590f3aad8d1c49 /dtrain/test/example | |
parent | e77078e31cd75f0e5983d332b990809a3644b0fb (diff) |
polish
Diffstat (limited to 'dtrain/test/example')
-rw-r--r-- | dtrain/test/example/cdec.ini | 8 | ||||
-rw-r--r-- | dtrain/test/example/dtrain.ini | 28 |
2 files changed, 19 insertions, 17 deletions
diff --git a/dtrain/test/example/cdec.ini b/dtrain/test/example/cdec.ini index ad958ca6..fe5ca759 100644 --- a/dtrain/test/example/cdec.ini +++ b/dtrain/test/example/cdec.ini @@ -5,6 +5,7 @@ intersection_strategy=cube_pruning cubepruning_pop_limit=30 feature_function=WordPenalty feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz +# all currently working feature function for translation: #feature_function=ArityPenalty #feature_function=CMR2008ReorderingFeatures #feature_function=Dwarf @@ -14,9 +15,10 @@ feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz #feature_function=NgramFeatures #feature_function=NonLatinCount #feature_function=OutputIndicator -#feature_function=RuleIdentityFeatures -#feature_function=RuleNgramFeatures -#feature_function=RuleShape +feature_function=RuleIdentityFeatures +feature_function=RuleNgramFeatures +feature_function=RuleShape #feature_function=SourceSpanSizeFeatures #feature_function=SourceWordPenalty #feature_function=SpanFeatures +# ^^^ features active that were used in the ACL paper diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini index ed1b7e5f..68173e11 100644 --- a/dtrain/test/example/dtrain.ini +++ b/dtrain/test/example/dtrain.ini @@ -1,20 +1,20 @@ input=test/example/nc-wmt11.1k.gz # use '-' for stdin -output=w.gz # a weights file -decoder_config=test/example/cdec.ini # a ini for cdec +output=- # a weights file or stdout +decoder_config=test/example/cdec.ini # ini for cdec # these will be printed on each iteration print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough tmp=/tmp -stop_after=20 +stop_after=10 # stop iteration after 10 inputs # interesting stuff -epochs=1 -k=100 -N=4 -learning_rate=0.0001 -gamma=0.00001 -scorer=stupid_bleu -sample_from=kbest -filter=uniq -pair_sampling=108010 -pair_threshold=0.01 -select_weights=last +epochs=3 # run over input 3 times +k=200 # use 100best lists +N=4 # optimize (approx) BLEU4 +learning_rate=0.0001 # learning rate +gamma=0.00001 # use SVM reg +scorer=stupid_bleu # use stupid BLEU+1 approx. +sample_from=kbest # use kbest lists (as opposed to forest) +filter=uniq # only uniq entries in kbest +pair_sampling=108010 # 10 vs 80 vs 10 and 80 vs 10 +pair_threshold=0 # minimum distance in BLEU +select_weights=last # just output last weights |