From 190ad8ae1e131ac0e29ff975b0d6502f3cc57af6 Mon Sep 17 00:00:00 2001 From: graehl Date: Mon, 19 Jul 2010 23:40:30 +0000 Subject: fixed oracle generate, detailed score info with --verbose git-svn-id: https://ws10smt.googlecode.com/svn/trunk@329 ec762483-ff6d-05da-a07a-a48fb63a330f --- vest/aer_scorer.cc | 6 +++--- vest/mr_vest_generate_mapper_input.cc | 27 +++++++++++++++++++-------- vest/scorer.cc | 30 ++++++++++++++++++------------ vest/ter.cc | 10 +++++++--- 4 files changed, 47 insertions(+), 26 deletions(-) (limited to 'vest') diff --git a/vest/aer_scorer.cc b/vest/aer_scorer.cc index 253076c5..81ffae76 100644 --- a/vest/aer_scorer.cc +++ b/vest/aer_scorer.cc @@ -18,9 +18,9 @@ class AERScore : public Score { virtual void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){} virtual void PlusEquals(const Score& delta, const float scale) { const AERScore& other = static_cast(delta); - num_matches += other.num_matches; - num_predicted += other.num_predicted; - num_in_ref += other.num_in_ref; + num_matches += scale*other.num_matches; + num_predicted += scale*other.num_predicted; + num_in_ref += scale*other.num_in_ref; } virtual void PlusEquals(const Score& delta) { const AERScore& other = static_cast(delta); diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc index f66b5082..5ab5c689 100644 --- a/vest/mr_vest_generate_mapper_input.cc +++ b/vest/mr_vest_generate_mapper_input.cc @@ -183,20 +183,27 @@ struct oracle_directions { Sentences model_hyps; - vector model_scores; + vector model_scores; bool have_doc; void Init() { have_doc=!decoder_translations_file.empty(); if (have_doc) { model_hyps.Load(decoder_translations_file); + if (verbose) model_hyps.Print(cerr,5); model_scores.resize(model_hyps.size()); + if (dev_set_size!=model_hyps.size()) { + cerr<<"You supplied decoder_translations with a different number of lines ("<ScoreCandidate(model_hyps[i]); - if (verbose) cerr<<"Before model["<ScoreDetails()<PlusEquals(*model_scores[i]); - if (verbose) cerr<<"After model["<ScoreDetails()<PlusEquals(*model_scores[i]); + if (verbose) cerr<<"After model["<PlusEquals(*hopesc,-1); + cerr<<"Without hope: "<ScoreDetails()<* precs, float* bp) const; float ComputePartialScore(vector* prec, float* bp) const; valarray correct_ngram_hit_counts; @@ -475,10 +478,13 @@ void SentenceScorer::ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface void BLEUScore::ScoreDetails(string* details) const { char buf[2000]; - vector precs(4); + vector precs(min(N(),4)); float bp; float bleu = ComputeScore(&precs, &bp); - sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)", + for (int i=N();i<4;++i) + precs[i]=0.; + char *bufn; + bufn=buf+sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)", bleu*100.0, precs[0]*100.0, precs[1]*100.0, @@ -492,7 +498,7 @@ float BLEUScore::ComputeScore(vector* precs, float* bp) const { float log_bleu = 0; if (precs) precs->clear(); int count = 0; - for (int i = 0; i < hyp_ngram_counts.size(); ++i) { + for (int i = 0; i < N(); ++i) { if (hyp_ngram_counts[i] > 0) { float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]); if (precs) precs->push_back(exp(lprec)); @@ -516,7 +522,7 @@ float BLEUScore::ComputePartialScore(vector* precs, float* bp) const { float log_bleu = 0; if (precs) precs->clear(); int count = 0; - for (int i = 0; i < hyp_ngram_counts.size(); ++i) { + for (int i = 0; i < N(); ++i) { // cerr << "In CPS " << hyp_ngram_counts[i] << " " << correct_ngram_hit_counts[i] << endl; if (hyp_ngram_counts[i] > 0) { float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]); @@ -562,10 +568,10 @@ void BLEUScore::PlusEquals(const Score& delta) { void BLEUScore::PlusEquals(const Score& delta, const float scale) { const BLEUScore& d = static_cast(delta); - correct_ngram_hit_counts = (correct_ngram_hit_counts + d.correct_ngram_hit_counts) * scale; - hyp_ngram_counts = ( hyp_ngram_counts + d.hyp_ngram_counts) * scale; - ref_len = (ref_len + d.ref_len) * scale; - hyp_len = ( hyp_len + d.hyp_len) * scale; + correct_ngram_hit_counts = correct_ngram_hit_counts + (d.correct_ngram_hit_counts * scale); + hyp_ngram_counts = hyp_ngram_counts + (d.hyp_ngram_counts * scale); + ref_len = ref_len + (d.ref_len * scale); + hyp_len = hyp_len + (d.hyp_len * scale); } void BLEUScore::PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len){ @@ -583,11 +589,11 @@ void BLEUScore::PlusPartialEquals(const Score& delta, int oracle_e_cover, int or ScoreP BLEUScore::GetZero() const { - return ScoreP(new BLEUScore(hyp_ngram_counts.size())); + return ScoreP(new BLEUScore(N())); } ScoreP BLEUScore::GetOne() const { - return ScoreP(new BLEUScore(hyp_ngram_counts.size(),1)); + return ScoreP(new BLEUScore(N(),1)); } diff --git a/vest/ter.cc b/vest/ter.cc index b4ebc4f5..8c8494ad 100644 --- a/vest/ter.cc +++ b/vest/ter.cc @@ -9,7 +9,7 @@ #include #include #include - +#include #include "tdict.h" const bool ter_use_average_ref_len = true; @@ -432,8 +432,12 @@ class TERScore : public Score { void ScoreDetails(string* details) const; void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){} void PlusEquals(const Score& delta, const float scale) { - stats += static_cast(delta).stats; - } + if (scale==1) + stats += static_cast(delta).stats; + if (scale==-1) + stats -= static_cast(delta).stats; + throw std::runtime_error("TERScore::PlusEquals with scale != +-1"); + } void PlusEquals(const Score& delta) { stats += static_cast(delta).stats; } -- cgit v1.2.3