From f9b3b81d7b5a817f39238c1d1fcbaa981d729f4a Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 4 Sep 2011 23:40:44 +0200 Subject: minor updates, fixes, kbest filtering switch --- dtrain/test/blunsom08.dtrain.ini | 9 +++++++++ dtrain/test/cdec.ini | 6 ------ dtrain/test/cdec.lm.ini | 7 +++++++ dtrain/test/nc-wmt11/cdec.ini | 7 +++++++ dtrain/test/nc-wmt11/dtrain.ini | 10 ++++++++++ dtrain/test/test.in | 4 ++-- dtrain/test/toy.dtrain.ini | 2 +- dtrain/test/toy.in | 4 ++-- 8 files changed, 38 insertions(+), 11 deletions(-) create mode 100644 dtrain/test/blunsom08.dtrain.ini create mode 100644 dtrain/test/cdec.lm.ini create mode 100644 dtrain/test/nc-wmt11/cdec.ini create mode 100644 dtrain/test/nc-wmt11/dtrain.ini (limited to 'dtrain/test') diff --git a/dtrain/test/blunsom08.dtrain.ini b/dtrain/test/blunsom08.dtrain.ini new file mode 100644 index 00000000..262398fe --- /dev/null +++ b/dtrain/test/blunsom08.dtrain.ini @@ -0,0 +1,9 @@ +decoder_config=test/cdec.lm.ini +kbest=100 +ngrams=3 +epochs=8 +input=data/blunsom08.in. # NEXT__RULE ... +scorer=approx_bleu +output=data/w/blunsom08.weights.gz +stop_after=100 + diff --git a/dtrain/test/cdec.ini b/dtrain/test/cdec.ini index f2695c5d..0ee371c8 100644 --- a/dtrain/test/cdec.ini +++ b/dtrain/test/cdec.ini @@ -1,9 +1,3 @@ formalism=scfg add_pass_through_rules=true -#feature_function=WordPenalty -#feature_function=KLanguageModel data/demo/en.3gram.pruned.klm -#feature_function=KLanguageModel data/europarl-v6.tok.lc.s-tag.en.arpa.kenlm.v4.mma -cubepruning_pop_limit=30 -scfg_max_span_limit=15 -#k_best=100 diff --git a/dtrain/test/cdec.lm.ini b/dtrain/test/cdec.lm.ini new file mode 100644 index 00000000..56bbb32f --- /dev/null +++ b/dtrain/test/cdec.lm.ini @@ -0,0 +1,7 @@ +formalism=scfg +add_pass_through_rules=true +feature_function=KLanguageModel data/en.3gram.pruned.klm +feature_function=WordPenalty +cubepruning_pop_limit=30 +scfg_max_span_limit=15 + diff --git a/dtrain/test/nc-wmt11/cdec.ini b/dtrain/test/nc-wmt11/cdec.ini new file mode 100644 index 00000000..fb2cbf23 --- /dev/null +++ b/dtrain/test/nc-wmt11/cdec.ini @@ -0,0 +1,7 @@ +formalism=scfg +add_pass_through_rules=true +feature_function=KLanguageModel data/nc-wmt11.en.srilm.gz +feature_function=WordPenalty +cubepruning_pop_limit=30 +scfg_max_span_limit=15 + diff --git a/dtrain/test/nc-wmt11/dtrain.ini b/dtrain/test/nc-wmt11/dtrain.ini new file mode 100644 index 00000000..51033f2d --- /dev/null +++ b/dtrain/test/nc-wmt11/dtrain.ini @@ -0,0 +1,10 @@ +decoder_config=test/nc-wmt11/cdec.ini +kbest=100 +ngrams=3 +epochs=8 +input=data/nc-wmt11.loo.localf.p0.500.rule-id #nc-wmt11-de-en-dyer-cs-joshua.tok.lc.fixamp1.loo.psg.dtrain.localf.p0 +scorer=approx_bleu +output=data/w/nc-wmt11.loo.p0.weights.gz +#stop_after=100 +wprint=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 + diff --git a/dtrain/test/test.in b/dtrain/test/test.in index 294d009b..662c8e3a 100644 --- a/dtrain/test/test.in +++ b/dtrain/test/test.in @@ -1,2 +1,2 @@ -vorrichtung means [X] ||| vorrichtung ||| apparatus ||| LogP=-200 ||| 0-0 __NEXT_RULE__ [X] ||| vorrichtung ||| means ||| LogP=-101 ||| 0-0 -eintest test [X] ||| eintest ||| test ||| LogP=-200 ||| 0-0 __NEXT_RULE__ [X] ||| eintest ||| xxx ||| LogP=-101 ||| 0-0 +0 vorrichtung means [X] ||| vorrichtung ||| apparatus ||| LogP=-200 ||| 0-0 __NEXT_RULE__ [X] ||| vorrichtung ||| means ||| LogP=-101 ||| 0-0 +1 eintest test [X] ||| eintest ||| test ||| LogP=-200 ||| 0-0 __NEXT_RULE__ [X] ||| eintest ||| xxx ||| LogP=-101 ||| 0-0 diff --git a/dtrain/test/toy.dtrain.ini b/dtrain/test/toy.dtrain.ini index 11cb7b1b..cacb3a2c 100644 --- a/dtrain/test/toy.dtrain.ini +++ b/dtrain/test/toy.dtrain.ini @@ -1,4 +1,4 @@ -decoder_config=data/cdec.ini +decoder_config=test/cdec.ini kbest=4 ngrams=1 epochs=3 diff --git a/dtrain/test/toy.in b/dtrain/test/toy.in index e49629b0..63f97158 100644 --- a/dtrain/test/toy.in +++ b/dtrain/test/toy.in @@ -1,2 +1,2 @@ -ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0 -ich fand ein grosses haus i found a large house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0 +0 ich sah ein kleines haus i saw a little house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0 +1 ich fand ein grosses haus i found a large house [S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0 -- cgit v1.2.3