From a433961e78a958d25a6d708bfd89e9655d1217c7 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Mon, 26 Sep 2011 18:24:58 +0200 Subject: score refactoring #1 --- dtrain/dtrain.cc | 41 +++++++++++------------------------------ 1 file changed, 11 insertions(+), 30 deletions(-) (limited to 'dtrain/dtrain.cc') diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index 44090242..35e6cc46 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -106,7 +106,7 @@ main(int argc, char** argv) // scoring metric/scorer string scorer_str = cfg["scorer"].as(); - score_t (*scorer)(NgramCounts&, const unsigned, const unsigned, unsigned, vector); + /*score_t (*scorer)(NgramCounts&, const unsigned, const unsigned, unsigned, vector); if (scorer_str == "bleu") { scorer = &bleu; } else if (scorer_str == "stupid_bleu") { @@ -122,9 +122,11 @@ main(int argc, char** argv) NgramCounts global_counts(N); // counts for 1 best translations unsigned global_hyp_len = 0; // sum hypothesis lengths unsigned global_ref_len = 0; // sum reference lengths - // ^^^ global_* for approx_bleu + // ^^^ global_* for approx_bleu*/ vector bleu_weights; // we leave this empty -> 1/N - if (!quiet) cerr << setw(26) << "scorer '" << scorer_str << "'" << endl << endl; + //if (!quiet) cerr << setw(26) << "scorer '" << scorer_str << "'" << endl << endl; + StupidBleuScorer scorer; + scorer.Init(N, bleu_weights); // init weights Weights weights; @@ -240,7 +242,6 @@ main(int argc, char** argv) // handling input strsplit(in, in_split, '\t', 4); // getting reference - ref_ids.clear(); vector ref_tok; strsplit(in_split[2], ref_tok, ' '); register_and_convert(ref_tok, ref_ids); @@ -279,43 +280,23 @@ main(int argc, char** argv) // (local) scoring if (t > 0) ref_ids = ref_ids_buf[ii]; - score_t score = 0.; for (unsigned i = 0; i < samples->size(); i++) { - NgramCounts counts = make_ngram_counts(ref_ids, (*samples)[i].w, N); - if (scorer_str == "approx_bleu") { - unsigned hyp_len = 0; - if (i == 0) { // 'context of 1best translations' - global_counts += counts; - global_hyp_len += (*samples)[i].w.size(); - global_ref_len += ref_ids.size(); - counts.reset(); - } else { - hyp_len = (*samples)[i].w.size(); - } - NgramCounts _c = global_counts + counts; - score = .9 * scorer(_c, - global_ref_len, - global_hyp_len + hyp_len, N, bleu_weights); - } else { - score = scorer(counts, - ref_ids.size(), - (*samples)[i].w.size(), N, bleu_weights); - } - - (*samples)[i].score = (score); + //cout << ii << " " << i << endl; + cout << _p9; + (*samples)[i].score = scorer.Score((*samples)[i], ref_ids, ii); if (i == 0) { - score_sum += score; + score_sum += (*samples)[i].score; model_sum += (*samples)[i].model; } if (verbose) { if (i == 0) cerr << "'" << TD::GetString(ref_ids) << "' [ref]" << endl; cerr << _p5 << _np << "[hyp " << i << "] " << "'" << TD::GetString((*samples)[i].w) << "'"; - cerr << " [SCORE=" << score << ",model="<< (*samples)[i].model << "]" << endl; + cerr << " [SCORE=" << (*samples)[i].score << ",model="<< (*samples)[i].model << "]" << endl; cerr << (*samples)[i].f << endl; } - } // sample/scoring loop + } if (verbose) cerr << endl; -- cgit v1.2.3