From 10a232656a0c882b3b955d2bcfac138ce11e8a2e Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Tue, 13 Mar 2012 09:15:46 +0100 Subject: polish --- dtrain/test/example/cdec.ini | 8 +++++--- dtrain/test/example/dtrain.ini | 28 ++++++++++++++-------------- 2 files changed, 19 insertions(+), 17 deletions(-) (limited to 'dtrain/test/example') diff --git a/dtrain/test/example/cdec.ini b/dtrain/test/example/cdec.ini index ad958ca6..fe5ca759 100644 --- a/dtrain/test/example/cdec.ini +++ b/dtrain/test/example/cdec.ini @@ -5,6 +5,7 @@ intersection_strategy=cube_pruning cubepruning_pop_limit=30 feature_function=WordPenalty feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz +# all currently working feature function for translation: #feature_function=ArityPenalty #feature_function=CMR2008ReorderingFeatures #feature_function=Dwarf @@ -14,9 +15,10 @@ feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz #feature_function=NgramFeatures #feature_function=NonLatinCount #feature_function=OutputIndicator -#feature_function=RuleIdentityFeatures -#feature_function=RuleNgramFeatures -#feature_function=RuleShape +feature_function=RuleIdentityFeatures +feature_function=RuleNgramFeatures +feature_function=RuleShape #feature_function=SourceSpanSizeFeatures #feature_function=SourceWordPenalty #feature_function=SpanFeatures +# ^^^ features active that were used in the ACL paper diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini index ed1b7e5f..68173e11 100644 --- a/dtrain/test/example/dtrain.ini +++ b/dtrain/test/example/dtrain.ini @@ -1,20 +1,20 @@ input=test/example/nc-wmt11.1k.gz # use '-' for stdin -output=w.gz # a weights file -decoder_config=test/example/cdec.ini # a ini for cdec +output=- # a weights file or stdout +decoder_config=test/example/cdec.ini # ini for cdec # these will be printed on each iteration print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough tmp=/tmp -stop_after=20 +stop_after=10 # stop iteration after 10 inputs # interesting stuff -epochs=1 -k=100 -N=4 -learning_rate=0.0001 -gamma=0.00001 -scorer=stupid_bleu -sample_from=kbest -filter=uniq -pair_sampling=108010 -pair_threshold=0.01 -select_weights=last +epochs=3 # run over input 3 times +k=200 # use 100best lists +N=4 # optimize (approx) BLEU4 +learning_rate=0.0001 # learning rate +gamma=0.00001 # use SVM reg +scorer=stupid_bleu # use stupid BLEU+1 approx. +sample_from=kbest # use kbest lists (as opposed to forest) +filter=uniq # only uniq entries in kbest +pair_sampling=108010 # 10 vs 80 vs 10 and 80 vs 10 +pair_threshold=0 # minimum distance in BLEU +select_weights=last # just output last weights -- cgit v1.2.3