From 190ad8ae1e131ac0e29ff975b0d6502f3cc57af6 Mon Sep 17 00:00:00 2001 From: graehl Date: Mon, 19 Jul 2010 23:40:30 +0000 Subject: fixed oracle generate, detailed score info with --verbose git-svn-id: https://ws10smt.googlecode.com/svn/trunk@329 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/oracle_bleu.h | 2 +- decoder/sentences.h | 32 ++++++++++++++++++++++++++++---- vest/aer_scorer.cc | 6 +++--- vest/mr_vest_generate_mapper_input.cc | 27 +++++++++++++++++++-------- vest/scorer.cc | 30 ++++++++++++++++++------------ vest/ter.cc | 10 +++++++--- 6 files changed, 76 insertions(+), 31 deletions(-) diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index 470d311d..94548c18 100755 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h @@ -115,7 +115,6 @@ struct OracleBleu { set_oracle_doc_size(doc_size); } - typedef boost::shared_ptr ScoreP; ScoreP doc_score,sentscore; // made from factory, so we delete them ScoreP GetScore(Sentence const& sentence,int sent_id) { return ScoreP(ds[sent_id]->ScoreCandidate(sentence)); @@ -185,6 +184,7 @@ struct OracleBleu { } // destroys forest (replaces it w/ rescored oracle one) + // sets sentscore Oracle ComputeOracle(SentenceMetadata const& smeta,Hypergraph *forest_in_out,WeightVector const& feature_weights,unsigned kbest=0,std::string const& forest_output="") { Hypergraph &forest=*forest_in_out; Oracle r; diff --git a/decoder/sentences.h b/decoder/sentences.h index 842072b9..622a6f43 100755 --- a/decoder/sentences.h +++ b/decoder/sentences.h @@ -9,6 +9,10 @@ #include "stringlib.h" typedef std::vector Sentence; +inline std::ostream & operator<<(std::ostream &out,Sentence const& s) { + return out< ss=SplitOnWhitespace(str); @@ -38,15 +42,35 @@ public: Sentences() { } Sentences(unsigned n,Sentence const& sentence) : VS(n,sentence) { } Sentences(unsigned n,std::string const& sentence) : VS(n,StringToSentence(sentence)) { } + std::string filename; void Load(std::string file) { ReadFile r(file); - Load(*r.stream()); + Load(r.get(),file); } - void Load(std::istream &in) { - this->push_back(Sentence()); - while(in>>this->back()) ; + void Load(std::istream &in,std::string filen="-") { + filename=filen; + do { + this->push_back(Sentence()); + } while(in>>this->back()); this->pop_back(); } + void Print(std::ostream &out,int headn=0) const { + out << "[" << size()<< " sentences from "<size(); + if (headn>0&&headn(delta); - num_matches += other.num_matches; - num_predicted += other.num_predicted; - num_in_ref += other.num_in_ref; + num_matches += scale*other.num_matches; + num_predicted += scale*other.num_predicted; + num_in_ref += scale*other.num_in_ref; } virtual void PlusEquals(const Score& delta) { const AERScore& other = static_cast(delta); diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc index f66b5082..5ab5c689 100644 --- a/vest/mr_vest_generate_mapper_input.cc +++ b/vest/mr_vest_generate_mapper_input.cc @@ -183,20 +183,27 @@ struct oracle_directions { Sentences model_hyps; - vector model_scores; + vector model_scores; bool have_doc; void Init() { have_doc=!decoder_translations_file.empty(); if (have_doc) { model_hyps.Load(decoder_translations_file); + if (verbose) model_hyps.Print(cerr,5); model_scores.resize(model_hyps.size()); + if (dev_set_size!=model_hyps.size()) { + cerr<<"You supplied decoder_translations with a different number of lines ("<ScoreCandidate(model_hyps[i]); - if (verbose) cerr<<"Before model["<ScoreDetails()<PlusEquals(*model_scores[i]); - if (verbose) cerr<<"After model["<ScoreDetails()<PlusEquals(*model_scores[i]); + if (verbose) cerr<<"After model["<PlusEquals(*hopesc,-1); + cerr<<"Without hope: "<ScoreDetails()<* precs, float* bp) const; float ComputePartialScore(vector* prec, float* bp) const; valarray correct_ngram_hit_counts; @@ -475,10 +478,13 @@ void SentenceScorer::ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface void BLEUScore::ScoreDetails(string* details) const { char buf[2000]; - vector precs(4); + vector precs(min(N(),4)); float bp; float bleu = ComputeScore(&precs, &bp); - sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)", + for (int i=N();i<4;++i) + precs[i]=0.; + char *bufn; + bufn=buf+sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)", bleu*100.0, precs[0]*100.0, precs[1]*100.0, @@ -492,7 +498,7 @@ float BLEUScore::ComputeScore(vector* precs, float* bp) const { float log_bleu = 0; if (precs) precs->clear(); int count = 0; - for (int i = 0; i < hyp_ngram_counts.size(); ++i) { + for (int i = 0; i < N(); ++i) { if (hyp_ngram_counts[i] > 0) { float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]); if (precs) precs->push_back(exp(lprec)); @@ -516,7 +522,7 @@ float BLEUScore::ComputePartialScore(vector* precs, float* bp) const { float log_bleu = 0; if (precs) precs->clear(); int count = 0; - for (int i = 0; i < hyp_ngram_counts.size(); ++i) { + for (int i = 0; i < N(); ++i) { // cerr << "In CPS " << hyp_ngram_counts[i] << " " << correct_ngram_hit_counts[i] << endl; if (hyp_ngram_counts[i] > 0) { float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]); @@ -562,10 +568,10 @@ void BLEUScore::PlusEquals(const Score& delta) { void BLEUScore::PlusEquals(const Score& delta, const float scale) { const BLEUScore& d = static_cast(delta); - correct_ngram_hit_counts = (correct_ngram_hit_counts + d.correct_ngram_hit_counts) * scale; - hyp_ngram_counts = ( hyp_ngram_counts + d.hyp_ngram_counts) * scale; - ref_len = (ref_len + d.ref_len) * scale; - hyp_len = ( hyp_len + d.hyp_len) * scale; + correct_ngram_hit_counts = correct_ngram_hit_counts + (d.correct_ngram_hit_counts * scale); + hyp_ngram_counts = hyp_ngram_counts + (d.hyp_ngram_counts * scale); + ref_len = ref_len + (d.ref_len * scale); + hyp_len = hyp_len + (d.hyp_len * scale); } void BLEUScore::PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len){ @@ -583,11 +589,11 @@ void BLEUScore::PlusPartialEquals(const Score& delta, int oracle_e_cover, int or ScoreP BLEUScore::GetZero() const { - return ScoreP(new BLEUScore(hyp_ngram_counts.size())); + return ScoreP(new BLEUScore(N())); } ScoreP BLEUScore::GetOne() const { - return ScoreP(new BLEUScore(hyp_ngram_counts.size(),1)); + return ScoreP(new BLEUScore(N(),1)); } diff --git a/vest/ter.cc b/vest/ter.cc index b4ebc4f5..8c8494ad 100644 --- a/vest/ter.cc +++ b/vest/ter.cc @@ -9,7 +9,7 @@ #include #include #include - +#include #include "tdict.h" const bool ter_use_average_ref_len = true; @@ -432,8 +432,12 @@ class TERScore : public Score { void ScoreDetails(string* details) const; void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){} void PlusEquals(const Score& delta, const float scale) { - stats += static_cast(delta).stats; - } + if (scale==1) + stats += static_cast(delta).stats; + if (scale==-1) + stats -= static_cast(delta).stats; + throw std::runtime_error("TERScore::PlusEquals with scale != +-1"); + } void PlusEquals(const Score& delta) { stats += static_cast(delta).stats; } -- cgit v1.2.3