diff options
Diffstat (limited to 'dtrain')
| -rw-r--r-- | dtrain/Makefile.am | 2 | ||||
| -rw-r--r-- | dtrain/dtrain.cc | 12 | ||||
| -rw-r--r-- | dtrain/dtrain.h | 14 | ||||
| -rw-r--r-- | dtrain/pairsampling.h | 12 | ||||
| -rw-r--r-- | dtrain/test/example/README | 4 | ||||
| -rw-r--r-- | dtrain/test/example/dtrain.ini | 2 | ||||
| -rw-r--r-- | dtrain/test/example/expected-output | 124 | 
7 files changed, 159 insertions, 11 deletions
| diff --git a/dtrain/Makefile.am b/dtrain/Makefile.am index f39d161e..64fef489 100644 --- a/dtrain/Makefile.am +++ b/dtrain/Makefile.am @@ -3,5 +3,5 @@ bin_PROGRAMS = dtrain  dtrain_SOURCES = dtrain.cc score.cc  dtrain_LDADD   = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz -AM_CPPFLAGS = -O3 -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval +AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index 8b1fc953..864eb153 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -424,12 +424,18 @@ main(int argc, char** argv)        for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin();             it != pairs.end(); it++) { +#ifdef DTRAIN_FASTER_PERCEPTRON +        bool rank_error = true; // pair filtering already did this for us +        rank_errors++; +        score_t margin = 2.; // compiler, could you get rid of the margin? +#else          bool rank_error = it->first.model <= it->second.model;          if (rank_error) rank_errors++;          score_t margin = fabs(it->first.model - it->second.model); -        if (!rank_error && margin < 1) margin_violations++; +        if (!rank_error && margin < 1.) margin_violations++; +#endif          if (scale_bleu_diff) eta = it->first.score - it->second.score; -        if (rank_error || (gamma && margin<1)) { +        if (rank_error || (gamma && margin<1.)) {            SparseVector<weight_t> diff_vec = it->first.f - it->second.f;            lambdas.plus_eq_v_times_s(diff_vec, eta);            if (gamma) @@ -534,8 +540,10 @@ main(int argc, char** argv)      cerr << _np << npairs/(float)in_sz << endl;      cerr << "        avg # rank err: ";      cerr << rank_errors/(float)in_sz << endl; +#ifndef DTRAIN_FASTER_PERCEPTRON      cerr << "     avg # margin viol: ";      cerr << margin_violations/(float)in_sz << endl; +#endif      cerr << "    non0 feature count: " <<  nonz << endl;      cerr << "           avg list sz: " << list_sz/(float)in_sz << endl;      cerr << "           avg f count: " << f_count/(float)list_sz << endl; diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h index 94d149ce..d8dc14b6 100644 --- a/dtrain/dtrain.h +++ b/dtrain/dtrain.h @@ -1,6 +1,14 @@  #ifndef _DTRAIN_H_  #define _DTRAIN_H_ +#undef DTRAIN_FASTER_PERCEPTRON // only look at misranked pairs +                                 // DO NOT USE WITH SVM! +#undef DTRAIN_LOCAL +#define DTRAIN_DOTS 10 // after how many inputs to display a '.' +#define DTRAIN_GRAMMAR_DELIM "########EOS########" +#define DTRAIN_SCALE 100000 + +  #include <iomanip>  #include <climits>  #include <string.h> @@ -13,11 +21,7 @@  #include "filelib.h" -#undef DTRAIN_LOCAL -#define DTRAIN_DOTS 10 // after how many inputs to display a '.' -#define DTRAIN_GRAMMAR_DELIM "########EOS########" -#define DTRAIN_SCALE 100000  using namespace std;  using namespace dtrain; @@ -32,7 +36,7 @@ inline void register_and_convert(const vector<string>& strs, vector<WordID>& ids  inline string gettmpf(const string path, const string infix)  { -  char fn[1024]; +  char fn[path.size() + infix.size() + 8];    strcpy(fn, path.c_str());    strcat(fn, "/");    strcat(fn, infix.c_str()); diff --git a/dtrain/pairsampling.h b/dtrain/pairsampling.h index bac132c6..5085738e 100644 --- a/dtrain/pairsampling.h +++ b/dtrain/pairsampling.h @@ -51,6 +51,9 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor    unsigned sep = round(sz*hi_lo);    for (unsigned i = 0; i < sep; i++) {      for (unsigned j = sep; j < sz; j++) { +#ifdef DTRAIN_FASTER_PERCEPTRON +      if ((*s)[i].model <= (*s)[j].model) { +#endif        if (threshold > 0) {          if (accept_pair((*s)[i].score, (*s)[j].score, threshold))            training.push_back(make_pair((*s)[i], (*s)[j])); @@ -58,10 +61,16 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor          if ((*s)[i].score != (*s)[j].score)            training.push_back(make_pair((*s)[i], (*s)[j]));        } +#ifdef DTRAIN_FASTER_PERCEPTRON +      } +#endif      }    }    for (unsigned i = sep; i < sz-sep; i++) {      for (unsigned j = sz-sep; j < sz; j++) { +#ifdef DTRAIN_FASTER_PERCEPTRON +      if ((*s)[i].model <= (*s)[j].model) { +#endif        if (threshold > 0) {          if (accept_pair((*s)[i].score, (*s)[j].score, threshold))            training.push_back(make_pair((*s)[i], (*s)[j])); @@ -69,6 +78,9 @@ partXYX(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, scor          if ((*s)[i].score != (*s)[j].score)            training.push_back(make_pair((*s)[i], (*s)[j]));        } +#ifdef DTRAIN_FASTER_PERCEPTRON +      } +#endif      }    }  } diff --git a/dtrain/test/example/README b/dtrain/test/example/README index b3ea5f06..6937b11b 100644 --- a/dtrain/test/example/README +++ b/dtrain/test/example/README @@ -1,8 +1,8 @@  Small example of input format for distributed training.  Call dtrain from cdec/dtrain/ with ./dtrain -c test/example/dtrain.ini . -For this to work, disable '#define DTRAIN_LOCAL' from dtrain.h +For this to work, undef 'DTRAIN_LOCAL' in dtrain.h  and recompile. -Data is here: http://simianer.de/dtrain +Data is here: http://simianer.de/#dtrain diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini index f87ee9cf..e43d6b34 100644 --- a/dtrain/test/example/dtrain.ini +++ b/dtrain/test/example/dtrain.ini @@ -5,7 +5,7 @@ decoder_config=test/example/cdec.ini # config for cdec  # weights for these features will be printed on each iteration  print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PhraseModel_5 PhraseModel_6 PassThrough  tmp=/tmp -stop_after=10 # stop epoch after 20 inputs +stop_after=20 # stop epoch after 20 inputs  # interesting stuff  epochs=3                # run over input 3 times diff --git a/dtrain/test/example/expected-output b/dtrain/test/example/expected-output new file mode 100644 index 00000000..08733dd4 --- /dev/null +++ b/dtrain/test/example/expected-output @@ -0,0 +1,124 @@ +                cdec cfg 'test/example/cdec.ini' +feature: WordPenalty (no config parameters) +State is 0 bytes for feature WordPenalty +feature: KLanguageModel (with config parameters 'test/example/nc-wmt11.en.srilm.gz') +Loading the LM will be faster if you build a binary file. +Reading test/example/nc-wmt11.en.srilm.gz +----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100 +**************************************************************************************************** +Loaded 5-gram KLM from test/example/nc-wmt11.en.srilm.gz (MapSize=49581) +State is 98 bytes for feature KLanguageModel test/example/nc-wmt11.en.srilm.gz +feature: RuleIdentityFeatures (no config parameters) +State is 0 bytes for feature RuleIdentityFeatures +feature: RuleNgramFeatures (no config parameters) +State is 0 bytes for feature RuleNgramFeatures +feature: RuleShape (no config parameters) +  Example feature: Shape_S00000_T00000 +State is 0 bytes for feature RuleShape +Seeding random number sequence to 380245307 + +dtrain +Parameters: +                       k 100 +                       N 4 +                       T 3 +                 scorer 'stupid_bleu' +             sample from 'kbest' +                  filter 'uniq' +           learning rate 0.0001 +                   gamma 0 +                   pairs 'XYX' +                   hi lo 0.1 +          pair threshold 0 +          select weights 'VOID' +                  l1 reg 0 'none' +                cdec cfg 'test/example/cdec.ini' +                   input 'test/example/nc-wmt11.1k.gz' +                  output '-' +              stop_after 20 +(a dot represents 10 inputs) +Iteration #1 of 3. + .. 20 +Stopping after 20 input sentences. +WEIGHTS +              Glue = -0.1015 +       WordPenalty = -0.0152 +     LanguageModel = +0.21493 + LanguageModel_OOV = -0.3257 +     PhraseModel_0 = -0.050844 +     PhraseModel_1 = +0.25074 +     PhraseModel_2 = +0.27944 +     PhraseModel_3 = -0.038384 +     PhraseModel_4 = -0.12041 +     PhraseModel_5 = +0.1047 +     PhraseModel_6 = -0.1289 +       PassThrough = -0.3094 +        --- +       1best avg score: 0.17508 (+0.17508) + 1best avg model score: -1.2392 (-1.2392) +           avg # pairs: 1329.8 +        avg # rank err: 649.1 +     avg # margin viol: 677.5 +    non0 feature count: 874 +           avg list sz: 88.6 +           avg f count: 85.643 +(time 0.25 min, 0.75 s/S) + +Iteration #2 of 3. + .. 20 +WEIGHTS +              Glue = -0.0792 +       WordPenalty = -0.056198 +     LanguageModel = +0.31038 + LanguageModel_OOV = -0.4011 +     PhraseModel_0 = +0.072188 +     PhraseModel_1 = +0.11473 +     PhraseModel_2 = +0.049774 +     PhraseModel_3 = -0.18448 +     PhraseModel_4 = -0.12092 +     PhraseModel_5 = +0.1599 +     PhraseModel_6 = -0.0606 +       PassThrough = -0.3848 +        --- +       1best avg score: 0.24015 (+0.065075) + 1best avg model score: -10.131 (-8.8914) +           avg # pairs: 1324.7 +        avg # rank err: 558.65 +     avg # margin viol: 752.85 +    non0 feature count: 1236 +           avg list sz: 84.9 +           avg f count: 88.306 +(time 0.22 min, 0.65 s/S) + +Iteration #3 of 3. + .. 20 +WEIGHTS +              Glue = -0.051 +       WordPenalty = -0.077956 +     LanguageModel = +0.33699 + LanguageModel_OOV = -0.4726 +     PhraseModel_0 = +0.040228 +     PhraseModel_1 = +0.18 +     PhraseModel_2 = +0.15618 +     PhraseModel_3 = -0.098908 +     PhraseModel_4 = -0.036555 +     PhraseModel_5 = +0.1619 +     PhraseModel_6 = +0.0078 +       PassThrough = -0.4563 +        --- +       1best avg score: 0.25527 (+0.015113) + 1best avg model score: -13.906 (-3.7756) +           avg # pairs: 1356.3 +        avg # rank err: 562.1 +     avg # margin viol: 757.35 +    non0 feature count: 1482 +           avg list sz: 86.65 +           avg f count: 87.475 +(time 0.23 min, 0.7 s/S) + +Writing weights file to '-' ... +done + +--- +Best iteration: 3 [SCORE 'stupid_bleu'=0.25527]. +This took 0.7 min. | 
