diff options
author | Patrick Simianer <p@simianer.de> | 2011-09-26 18:24:58 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2011-09-26 18:24:58 +0200 |
commit | a433961e78a958d25a6d708bfd89e9655d1217c7 (patch) | |
tree | a2df654e7cd7982e98500dac35ace278c4b93c4d /dtrain/dtrain.cc | |
parent | 899a30eb4e53d539ee0b846f38d7524fec811864 (diff) |
score refactoring #1
Diffstat (limited to 'dtrain/dtrain.cc')
-rw-r--r-- | dtrain/dtrain.cc | 41 |
1 files changed, 11 insertions, 30 deletions
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc index 44090242..35e6cc46 100644 --- a/dtrain/dtrain.cc +++ b/dtrain/dtrain.cc @@ -106,7 +106,7 @@ main(int argc, char** argv) // scoring metric/scorer string scorer_str = cfg["scorer"].as<string>(); - score_t (*scorer)(NgramCounts&, const unsigned, const unsigned, unsigned, vector<score_t>); + /*score_t (*scorer)(NgramCounts&, const unsigned, const unsigned, unsigned, vector<score_t>); if (scorer_str == "bleu") { scorer = &bleu; } else if (scorer_str == "stupid_bleu") { @@ -122,9 +122,11 @@ main(int argc, char** argv) NgramCounts global_counts(N); // counts for 1 best translations unsigned global_hyp_len = 0; // sum hypothesis lengths unsigned global_ref_len = 0; // sum reference lengths - // ^^^ global_* for approx_bleu + // ^^^ global_* for approx_bleu*/ vector<score_t> bleu_weights; // we leave this empty -> 1/N - if (!quiet) cerr << setw(26) << "scorer '" << scorer_str << "'" << endl << endl; + //if (!quiet) cerr << setw(26) << "scorer '" << scorer_str << "'" << endl << endl; + StupidBleuScorer scorer; + scorer.Init(N, bleu_weights); // init weights Weights weights; @@ -240,7 +242,6 @@ main(int argc, char** argv) // handling input strsplit(in, in_split, '\t', 4); // getting reference - ref_ids.clear(); vector<string> ref_tok; strsplit(in_split[2], ref_tok, ' '); register_and_convert(ref_tok, ref_ids); @@ -279,43 +280,23 @@ main(int argc, char** argv) // (local) scoring if (t > 0) ref_ids = ref_ids_buf[ii]; - score_t score = 0.; for (unsigned i = 0; i < samples->size(); i++) { - NgramCounts counts = make_ngram_counts(ref_ids, (*samples)[i].w, N); - if (scorer_str == "approx_bleu") { - unsigned hyp_len = 0; - if (i == 0) { // 'context of 1best translations' - global_counts += counts; - global_hyp_len += (*samples)[i].w.size(); - global_ref_len += ref_ids.size(); - counts.reset(); - } else { - hyp_len = (*samples)[i].w.size(); - } - NgramCounts _c = global_counts + counts; - score = .9 * scorer(_c, - global_ref_len, - global_hyp_len + hyp_len, N, bleu_weights); - } else { - score = scorer(counts, - ref_ids.size(), - (*samples)[i].w.size(), N, bleu_weights); - } - - (*samples)[i].score = (score); + //cout << ii << " " << i << endl; + cout << _p9; + (*samples)[i].score = scorer.Score((*samples)[i], ref_ids, ii); if (i == 0) { - score_sum += score; + score_sum += (*samples)[i].score; model_sum += (*samples)[i].model; } if (verbose) { if (i == 0) cerr << "'" << TD::GetString(ref_ids) << "' [ref]" << endl; cerr << _p5 << _np << "[hyp " << i << "] " << "'" << TD::GetString((*samples)[i].w) << "'"; - cerr << " [SCORE=" << score << ",model="<< (*samples)[i].model << "]" << endl; + cerr << " [SCORE=" << (*samples)[i].score << ",model="<< (*samples)[i].model << "]" << endl; cerr << (*samples)[i].f << endl; } - } // sample/scoring loop + } if (verbose) cerr << endl; |