From 0f10af77140704a969073e0718c2eb3ba1f99ead Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Thu, 2 Apr 2015 13:58:20 +0200 Subject: learning --- model/cdec.ini | 28 ++++++++++++++++++++++++++++ model/dtrain.ini | 4 ++++ model/grammar.gz | Bin 0 -> 121 bytes model/nc-wmt11.en.srilm.gz | Bin 0 -> 16017291 bytes model/run | 4 ++++ model/src.gz | Bin 0 -> 70 bytes model/weights.init | 12 ++++++++++++ 7 files changed, 48 insertions(+) create mode 100644 model/cdec.ini create mode 100644 model/dtrain.ini create mode 100644 model/grammar.gz create mode 100644 model/nc-wmt11.en.srilm.gz create mode 100755 model/run create mode 100644 model/src.gz create mode 100644 model/weights.init (limited to 'model') diff --git a/model/cdec.ini b/model/cdec.ini new file mode 100644 index 0000000..36f53ac --- /dev/null +++ b/model/cdec.ini @@ -0,0 +1,28 @@ +formalism=scfg +add_pass_through_rules=true +scfg_max_span_limit=15 +intersection_strategy=cube_pruning +cubepruning_pop_limit=200 +grammar=grammar.gz +feature_function=WordPenalty +feature_function=KLanguageModel nc-wmt11.en.srilm.gz +# all currently working feature functions for translation: +# (with those features active that were used in the ACL paper) +#feature_function=ArityPenalty +#feature_function=CMR2008ReorderingFeatures +#feature_function=Dwarf +#feature_function=InputIndicator +#feature_function=LexNullJump +#feature_function=NewJump +#feature_function=NgramFeatures +#feature_function=NonLatinCount +#feature_function=OutputIndicator +feature_function=RuleIdentityFeatures +feature_function=RuleSourceBigramFeatures +feature_function=RuleTargetBigramFeatures +feature_function=RuleShape +#feature_function=LexicalFeatures 1 1 1 +#feature_function=SourceSpanSizeFeatures +#feature_function=SourceWordPenalty +#feature_function=SpanFeatures +weights=weights.init diff --git a/model/dtrain.ini b/model/dtrain.ini new file mode 100644 index 0000000..b6d29bb --- /dev/null +++ b/model/dtrain.ini @@ -0,0 +1,4 @@ +decoder_conf=./cdec.ini # config for cdec +k=100 # use 100best lists +N=4 # optimize (approx.) BLEU4 +margin=0.0 # perceptron's margin diff --git a/model/grammar.gz b/model/grammar.gz new file mode 100644 index 0000000..84eb73c Binary files /dev/null and b/model/grammar.gz differ diff --git a/model/nc-wmt11.en.srilm.gz b/model/nc-wmt11.en.srilm.gz new file mode 100644 index 0000000..7ce8105 Binary files /dev/null and b/model/nc-wmt11.en.srilm.gz differ diff --git a/model/run b/model/run new file mode 100755 index 0000000..43f20b0 --- /dev/null +++ b/model/run @@ -0,0 +1,4 @@ +#!/bin/bash -x + +~/src/cdec_net/training/dtrain/dtrain_net_interface -c dtrain.ini -a ipc:///tmp/dtrain.ipc + diff --git a/model/src.gz b/model/src.gz new file mode 100644 index 0000000..9aa247f Binary files /dev/null and b/model/src.gz differ diff --git a/model/weights.init b/model/weights.init new file mode 100644 index 0000000..0d09f9f --- /dev/null +++ b/model/weights.init @@ -0,0 +1,12 @@ +CountEF 0.1 +EgivenFCoherent -0.1 +Glue 0.01 +IsSingletonF -0.01 +IsSingletonFE -0.01 +LanguageModel 0.1 +LanguageModel_OOV -1 +MaxLexFgivenE -0.1 +MaxLexEgivenF -0.1 +PassThrough -0.1 +SampleCountF -0.1 +WordPenalty -0.1 -- cgit v1.2.3