diff options
author | Patrick Simianer <p@simianer.de> | 2014-06-12 13:56:42 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-06-12 13:56:42 +0200 |
commit | 244971287003d079e46193b8a209c28955f90134 (patch) | |
tree | 8beaae6b12b913acb213fc7f2415fd63886192f9 /training/mira | |
parent | 5250fd67a4b8f242068cff87f0a6a4211f8b0fcf (diff) | |
parent | b66e838ed52decc0be1eb5817b2a77c3840db2c5 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'training/mira')
-rw-r--r-- | training/mira/kbest_cut_mira.cc | 61 | ||||
-rwxr-xr-x | training/mira/mira.py | 11 |
2 files changed, 32 insertions, 40 deletions
diff --git a/training/mira/kbest_cut_mira.cc b/training/mira/kbest_cut_mira.cc index cde65332..724b1853 100644 --- a/training/mira/kbest_cut_mira.cc +++ b/training/mira/kbest_cut_mira.cc @@ -341,23 +341,22 @@ struct BasicObserver: public DecoderObserver { }; struct TrainingObserver : public DecoderObserver { - TrainingObserver(const int k, const DocScorer& d, vector<GoodBadOracle>* o, vector<ScoreP>* cbs) : ds(d), oracles(*o), corpus_bleu_sent_stats(*cbs), kbest_size(k) { - - - if(!pseudo_doc && !sent_approx) - if(cur_pass > 0) //calculate corpus bleu score from previous iterations 1-best for BLEU gain - { - ScoreP acc; - for (int ii = 0; ii < corpus_bleu_sent_stats.size(); ii++) { - if (!acc) { acc = corpus_bleu_sent_stats[ii]->GetZero(); } - acc->PlusEquals(*corpus_bleu_sent_stats[ii]); - - } - corpus_bleu_stats = acc; - corpus_bleu_score = acc->ComputeScore(); + TrainingObserver(const int k, + const DocScorer& d, + vector<GoodBadOracle>* o, + vector<ScoreP>* cbs) : ds(d), oracles(*o), corpus_bleu_sent_stats(*cbs), kbest_size(k) { + if(!pseudo_doc && !sent_approx) { + if(cur_pass > 0) { //calculate corpus bleu score from previous iterations 1-best for BLEU gain + ScoreP acc; + for (int ii = 0; ii < corpus_bleu_sent_stats.size(); ii++) { + if (!acc) { acc = corpus_bleu_sent_stats[ii]->GetZero(); } + acc->PlusEquals(*corpus_bleu_sent_stats[ii]); + } + corpus_bleu_stats = acc; + corpus_bleu_score = acc->ComputeScore(); } - -} + } + } const DocScorer& ds; vector<ScoreP>& corpus_bleu_sent_stats; vector<GoodBadOracle>& oracles; @@ -461,7 +460,6 @@ struct TrainingObserver : public DecoderObserver { } else //use sentence-level smoothing ( used when cur_pass=0 if not pseudo_doc) { - sentscore = mt_metric_scale * (ds[sent_id]->ScoreCandidate(d->yield)->ComputeScore()); } @@ -575,19 +573,15 @@ void ReadTrainingCorpus(const string& fname, vector<string>* c) { } } -void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScorer& ds, const string& od) -{ - cerr << "Reading BLEU gain file "; +void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScorer& ds, const string& od) { + cerr << "Reading previous score file "; string fname; - if(cur_pass == 0) - { - fname = od + "/run.raw.init"; - } - else - { - int last_pass = cur_pass - 1; - fname = od + "/run.raw." + boost::lexical_cast<std::string>(last_pass) + ".B"; - } + if (cur_pass == 0) { + fname = od + "/run.raw.init"; + } else { + int last_pass = cur_pass - 1; + fname = od + "/run.raw." + boost::lexical_cast<std::string>(last_pass) + ".B"; + } cerr << fname << "\n"; ReadFile rf(fname); istream& in = *rf.stream(); @@ -604,7 +598,6 @@ void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScore if (!acc) { acc = sentscore->GetZero(); } acc->PlusEquals(*sentscore); ++lc; - } assert(lc > 0); @@ -612,7 +605,6 @@ void ReadPastTranslationForScore(const int cur_pass, vector<ScoreP>* c, DocScore string details; acc->ScoreDetails(&details); cerr << "Previous run: " << details << score << endl; - } @@ -672,10 +664,9 @@ int main(int argc, char** argv) { //check training pass,if >0, then use previous iterations corpus bleu stats cur_pass = stream ? 0 : conf["pass"].as<int>(); - if(cur_pass > 0) - { - ReadPastTranslationForScore(cur_pass, &corpus_bleu_sent_stats, *ds, output_dir); - } + if(cur_pass > 0) { + ReadPastTranslationForScore(cur_pass, &corpus_bleu_sent_stats, *ds, output_dir); + } cerr << "Using optimizer:" << optimizer << endl; diff --git a/training/mira/mira.py b/training/mira/mira.py index 539a0b0e..691a62a6 100755 --- a/training/mira/mira.py +++ b/training/mira/mira.py @@ -203,14 +203,15 @@ def main(): if have_mpl: graph_file = graph(args.output_dir, hope_best_fear, args.metric) dev_results, dev_bleu = evaluate(args.devset, args.weights, args.config, - script_dir, args.output_dir) + script_dir, args.output_dir, args.jobs) if args.test: if args.test_config: test_results, test_bleu = evaluate(args.test, args.weights, - args.test_config, script_dir, args.output_dir) + args.test_config, script_dir, args.output_dir, + args.jobs) else: test_results, test_bleu = evaluate(args.test, args.weights, args.config, - script_dir, args.output_dir) + script_dir, args.output_dir, args.jobs) else: test_results = '' test_bleu = '' @@ -240,11 +241,11 @@ def graph(output_dir, hope_best_fear, metric): return graph_file #evaluate a given test set using decode-and-evaluate.pl -def evaluate(testset, weights, ini, script_dir, out_dir): +def evaluate(testset, weights, ini, script_dir, out_dir, jobs): evaluator = '{}/../utils/decode-and-evaluate.pl'.format(script_dir) try: p = subprocess.Popen([evaluator, '-c', ini, '-w', weights, '-i', testset, - '-d', out_dir, '--jobs', args.jobs], stdout=subprocess.PIPE) + '-d', out_dir, '--jobs', str(jobs)], stdout=subprocess.PIPE) results, err = p.communicate() bleu, results = results.split('\n',1) except subprocess.CalledProcessError: |