diff options
| author | Paul Baltescu <pauldb89@gmail.com> | 2013-04-24 17:18:10 +0100 | 
|---|---|---|
| committer | Paul Baltescu <pauldb89@gmail.com> | 2013-04-24 17:18:10 +0100 | 
| commit | ba206aaac1d95e76126443c9e7ccc5941e879849 (patch) | |
| tree | 13a918da3f3983fd8e4cb74e7cdc3f5e1fc01cd1 /training/dtrain/test/example | |
| parent | c2aede0f19b7a5e43581768b8c4fbfae8b92c68c (diff) | |
| parent | db960a8bba81df3217660ec5a96d73e0d6baa01b (diff) | |
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'training/dtrain/test/example')
| -rw-r--r-- | training/dtrain/test/example/README | 8 | ||||
| -rw-r--r-- | training/dtrain/test/example/cdec.ini | 25 | ||||
| -rw-r--r-- | training/dtrain/test/example/dtrain.ini | 22 | ||||
| -rw-r--r-- | training/dtrain/test/example/expected-output | 89 | 
4 files changed, 0 insertions, 144 deletions
| diff --git a/training/dtrain/test/example/README b/training/dtrain/test/example/README deleted file mode 100644 index 6937b11b..00000000 --- a/training/dtrain/test/example/README +++ /dev/null @@ -1,8 +0,0 @@ -Small example of input format for distributed training. -Call dtrain from cdec/dtrain/ with ./dtrain -c test/example/dtrain.ini . - -For this to work, undef 'DTRAIN_LOCAL' in dtrain.h -and recompile. - -Data is here: http://simianer.de/#dtrain - diff --git a/training/dtrain/test/example/cdec.ini b/training/dtrain/test/example/cdec.ini deleted file mode 100644 index d5955f0e..00000000 --- a/training/dtrain/test/example/cdec.ini +++ /dev/null @@ -1,25 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -scfg_max_span_limit=15 -intersection_strategy=cube_pruning -cubepruning_pop_limit=30 -feature_function=WordPenalty -feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz -# all currently working feature functions for translation: -# (with those features active that were used in the ACL paper) -#feature_function=ArityPenalty -#feature_function=CMR2008ReorderingFeatures -#feature_function=Dwarf -#feature_function=InputIndicator -#feature_function=LexNullJump -#feature_function=NewJump -#feature_function=NgramFeatures -#feature_function=NonLatinCount -#feature_function=OutputIndicator -feature_function=RuleIdentityFeatures -feature_function=RuleSourceBigramFeatures -feature_function=RuleTargetBigramFeatures -feature_function=RuleShape -#feature_function=SourceSpanSizeFeatures -#feature_function=SourceWordPenalty -#feature_function=SpanFeatures diff --git a/training/dtrain/test/example/dtrain.ini b/training/dtrain/test/example/dtrain.ini deleted file mode 100644 index 72d50ca1..00000000 --- a/training/dtrain/test/example/dtrain.ini +++ /dev/null @@ -1,22 +0,0 @@ -input=test/example/nc-wmt11.1k.gz    # use '-' for STDIN -output=-                             # a weights file (add .gz for gzip compression) or STDOUT '-' -select_weights=VOID                  # don't output weights -decoder_config=test/example/cdec.ini # config for cdec -# weights for these features will be printed on each iteration -print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough -tmp=/tmp -stop_after=10 # stop epoch after 10 inputs - -# interesting stuff -epochs=2                # run over input 2 times -k=100                   # use 100best lists -N=4                     # optimize (approx) BLEU4 -scorer=stupid_bleu      # use 'stupid' BLEU+1 -learning_rate=1.0       # learning rate, don't care if gamma=0 (perceptron) -gamma=0                 # use SVM reg -sample_from=kbest       # use kbest lists (as opposed to forest) -filter=uniq             # only unique entries in kbest (surface form) -pair_sampling=XYX -hi_lo=0.1               # 10 vs 80 vs 10 and 80 vs 10 here -pair_threshold=0        # minimum distance in BLEU (this will still only use pairs with diff > 0) -loss_margin=0 diff --git a/training/dtrain/test/example/expected-output b/training/dtrain/test/example/expected-output deleted file mode 100644 index 05326763..00000000 --- a/training/dtrain/test/example/expected-output +++ /dev/null @@ -1,89 +0,0 @@ -                cdec cfg 'test/example/cdec.ini' -Loading the LM will be faster if you build a binary file. -Reading test/example/nc-wmt11.en.srilm.gz -----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 -**************************************************************************************************** -  Example feature: Shape_S00000_T00000 -Seeding random number sequence to 2912000813 - -dtrain -Parameters: -                       k 100 -                       N 4 -                       T 2 -                 scorer 'stupid_bleu' -             sample from 'kbest' -                  filter 'uniq' -           learning rate 1 -                   gamma 0 -             loss margin 0 -                   pairs 'XYX' -                   hi lo 0.1 -          pair threshold 0 -          select weights 'VOID' -                  l1 reg 0 'none' -               max pairs 4294967295 -                cdec cfg 'test/example/cdec.ini' -                   input 'test/example/nc-wmt11.1k.gz' -                  output '-' -              stop_after 10 -(a dot represents 10 inputs) -Iteration #1 of 2. - . 10 -Stopping after 10 input sentences. -WEIGHTS -              Glue = -637 -       WordPenalty = +1064 -     LanguageModel = +1175.3 - LanguageModel_OOV = -1437 -     PhraseModel_0 = +1935.6 -     PhraseModel_1 = +2499.3 -     PhraseModel_2 = +964.96 -     PhraseModel_3 = +1410.8 -     PhraseModel_4 = -5977.9 -     PhraseModel_5 = +522 -     PhraseModel_6 = +1089 -       PassThrough = -1308 -        --- -       1best avg score: 0.16963 (+0.16963) - 1best avg model score: 64485 (+64485) -           avg # pairs: 1494.4 -        avg # rank err: 702.6 -     avg # margin viol: 0 -    non0 feature count: 528 -           avg list sz: 85.7 -           avg f count: 102.75 -(time 0.083 min, 0.5 s/S) - -Iteration #2 of 2. - . 10 -WEIGHTS -              Glue = -1196 -       WordPenalty = +809.52 -     LanguageModel = +3112.1 - LanguageModel_OOV = -1464 -     PhraseModel_0 = +3895.5 -     PhraseModel_1 = +4683.4 -     PhraseModel_2 = +1092.8 -     PhraseModel_3 = +1079.6 -     PhraseModel_4 = -6827.7 -     PhraseModel_5 = -888 -     PhraseModel_6 = +142 -       PassThrough = -1335 -        --- -       1best avg score: 0.277 (+0.10736) - 1best avg model score: -3110.5 (-67595) -           avg # pairs: 1144.2 -        avg # rank err: 529.1 -     avg # margin viol: 0 -    non0 feature count: 859 -           avg list sz: 74.9 -           avg f count: 112.84 -(time 0.067 min, 0.4 s/S) - -Writing weights file to '-' ... -done - ---- -Best iteration: 2 [SCORE 'stupid_bleu'=0.277]. -This took 0.15 min. | 
