diff options
-rwxr-xr-x | decoder/oracle_bleu.h | 2 | ||||
-rwxr-xr-x | decoder/sentences.h | 32 | ||||
-rw-r--r-- | vest/aer_scorer.cc | 6 | ||||
-rw-r--r-- | vest/mr_vest_generate_mapper_input.cc | 27 | ||||
-rw-r--r-- | vest/scorer.cc | 30 | ||||
-rw-r--r-- | vest/ter.cc | 10 |
6 files changed, 76 insertions, 31 deletions
diff --git a/decoder/oracle_bleu.h b/decoder/oracle_bleu.h index 470d311d..94548c18 100755 --- a/decoder/oracle_bleu.h +++ b/decoder/oracle_bleu.h @@ -115,7 +115,6 @@ struct OracleBleu { set_oracle_doc_size(doc_size); } - typedef boost::shared_ptr<Score> ScoreP; ScoreP doc_score,sentscore; // made from factory, so we delete them ScoreP GetScore(Sentence const& sentence,int sent_id) { return ScoreP(ds[sent_id]->ScoreCandidate(sentence)); @@ -185,6 +184,7 @@ struct OracleBleu { } // destroys forest (replaces it w/ rescored oracle one) + // sets sentscore Oracle ComputeOracle(SentenceMetadata const& smeta,Hypergraph *forest_in_out,WeightVector const& feature_weights,unsigned kbest=0,std::string const& forest_output="") { Hypergraph &forest=*forest_in_out; Oracle r; diff --git a/decoder/sentences.h b/decoder/sentences.h index 842072b9..622a6f43 100755 --- a/decoder/sentences.h +++ b/decoder/sentences.h @@ -9,6 +9,10 @@ #include "stringlib.h" typedef std::vector<WordID> Sentence; +inline std::ostream & operator<<(std::ostream &out,Sentence const& s) { + return out<<TD::GetString(s); +} + inline void StringToSentence(std::string const& str,Sentence &s) { using namespace std; vector<string> ss=SplitOnWhitespace(str); @@ -38,15 +42,35 @@ public: Sentences() { } Sentences(unsigned n,Sentence const& sentence) : VS(n,sentence) { } Sentences(unsigned n,std::string const& sentence) : VS(n,StringToSentence(sentence)) { } + std::string filename; void Load(std::string file) { ReadFile r(file); - Load(*r.stream()); + Load(r.get(),file); } - void Load(std::istream &in) { - this->push_back(Sentence()); - while(in>>this->back()) ; + void Load(std::istream &in,std::string filen="-") { + filename=filen; + do { + this->push_back(Sentence()); + } while(in>>this->back()); this->pop_back(); } + void Print(std::ostream &out,int headn=0) const { + out << "[" << size()<< " sentences from "<<filename<<"]"; + if (headn!=0) { + int i=0,e=this->size(); + if (headn>0&&headn<e) { + e=headn; + out << " (first "<<headn<<")"; + } + out << " :\n"; + for (;i<e;++i) + out<<(*this)[i] << "\n"; + } + } + friend inline std::ostream& operator<<(std::ostream &out,Sentences const& s) { + s.Print(out); + return out; + } }; diff --git a/vest/aer_scorer.cc b/vest/aer_scorer.cc index 253076c5..81ffae76 100644 --- a/vest/aer_scorer.cc +++ b/vest/aer_scorer.cc @@ -18,9 +18,9 @@ class AERScore : public Score { virtual void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){} virtual void PlusEquals(const Score& delta, const float scale) { const AERScore& other = static_cast<const AERScore&>(delta); - num_matches += other.num_matches; - num_predicted += other.num_predicted; - num_in_ref += other.num_in_ref; + num_matches += scale*other.num_matches; + num_predicted += scale*other.num_predicted; + num_in_ref += scale*other.num_in_ref; } virtual void PlusEquals(const Score& delta) { const AERScore& other = static_cast<const AERScore&>(delta); diff --git a/vest/mr_vest_generate_mapper_input.cc b/vest/mr_vest_generate_mapper_input.cc index f66b5082..5ab5c689 100644 --- a/vest/mr_vest_generate_mapper_input.cc +++ b/vest/mr_vest_generate_mapper_input.cc @@ -183,20 +183,27 @@ struct oracle_directions { Sentences model_hyps; - vector<OracleBleu::ScoreP> model_scores; + vector<ScoreP> model_scores; bool have_doc; void Init() { have_doc=!decoder_translations_file.empty(); if (have_doc) { model_hyps.Load(decoder_translations_file); + if (verbose) model_hyps.Print(cerr,5); model_scores.resize(model_hyps.size()); + if (dev_set_size!=model_hyps.size()) { + cerr<<"You supplied decoder_translations with a different number of lines ("<<model_hyps.size()<<") than dev_set_size ("<<dev_set_size<<")"<<endl; + abort(); + } + cerr << "Scoring model translations " << model_hyps << endl; for (int i=0;i<model_hyps.size();++i) { - //FIXME: what is scoreccand? with / without clipping? do without for consistency w/ oracle + //TODO: what is scoreCcand? without clipping? do without for consistency w/ oracle model_scores[i]=oracle.ds[i]->ScoreCandidate(model_hyps[i]); - if (verbose) cerr<<"Before model["<<i<<"]: "<<ds().ScoreDetails()<<endl; - if (verbose) cerr<<"model["<<i<<"]: "<<model_scores[i]->ScoreDetails()<<endl; - oracle.doc_score->PlusEquals(*model_scores[i]); - if (verbose) cerr<<"After model["<<i<<"]: "<<ds().ScoreDetails()<<endl; + assert(model_scores[i]); + if (verbose) cerr<<"Before model["<<i<<"]: "<<ds().ScoreDetails()<<endl; + if (verbose) cerr<<"model["<<i<<"]: "<<model_scores[i]->ScoreDetails()<<endl; + oracle.doc_score->PlusEquals(*model_scores[i]); + if (verbose) cerr<<"After model["<<i<<"]: "<<ds().ScoreDetails()<<endl; } //TODO: compute doc bleu stats for each sentence, then when getting oracle temporarily exclude stats for that sentence (skip regular score updating) } @@ -249,8 +256,12 @@ struct oracle_directions { o=oracle.ComputeOracle(oracle.MakeMetadata(hg,i),&hg,origin); if (verbose) { cerr << o; - cerr<<"After oracle: "<<ds().ScoreDetails()<<endl - <<" oracle="<<oracle.GetScore(o.hope.sentence,i)->ScoreDetails()<<endl + ScoreP hopesc=oracle.GetScore(o.hope.sentence,i); + oracle.doc_score->PlusEquals(*hopesc,1); + cerr<<"With hope: "<<ds().ScoreDetails()<<endl; + oracle.doc_score->PlusEquals(*hopesc,-1); + cerr<<"Without hope: "<<ds().ScoreDetails()<<endl; + cerr<<" oracle="<<oracle.GetScore(o.hope.sentence,i)->ScoreDetails()<<endl <<" model="<<oracle.GetScore(o.model.sentence,i)->ScoreDetails()<<endl; if (have_doc) cerr<<" doc (should = model): "<<model_scores[i]->ScoreDetails()<<endl; diff --git a/vest/scorer.cc b/vest/scorer.cc index 5cad948d..86894c32 100644 --- a/vest/scorer.cc +++ b/vest/scorer.cc @@ -150,10 +150,10 @@ class SERScorer : public SentenceScorer { class BLEUScore : public Score { friend class BLEUScorerBase; public: - BLEUScore(int n) : correct_ngram_hit_counts(float(0),float(n)), hyp_ngram_counts(float(0),float(n)) { + BLEUScore(int n) : correct_ngram_hit_counts(float(0),n), hyp_ngram_counts(float(0),n) { ref_len = 0; hyp_len = 0; } - BLEUScore(int n, int k) : correct_ngram_hit_counts(float(k),float(n)), hyp_ngram_counts(float(k),float(n)) { + BLEUScore(int n, int k) : correct_ngram_hit_counts(float(k),n), hyp_ngram_counts(float(k),n) { ref_len = k; hyp_len = k; } float ComputeScore() const; @@ -174,6 +174,9 @@ class BLEUScore : public Score { return true; } private: + int N() const { + return hyp_ngram_counts.size(); + } float ComputeScore(vector<float>* precs, float* bp) const; float ComputePartialScore(vector<float>* prec, float* bp) const; valarray<float> correct_ngram_hit_counts; @@ -475,10 +478,13 @@ void SentenceScorer::ComputeErrorSurface(const ViterbiEnvelope& ve, ErrorSurface void BLEUScore::ScoreDetails(string* details) const { char buf[2000]; - vector<float> precs(4); + vector<float> precs(min(N(),4)); float bp; float bleu = ComputeScore(&precs, &bp); - sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)", + for (int i=N();i<4;++i) + precs[i]=0.; + char *bufn; + bufn=buf+sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)", bleu*100.0, precs[0]*100.0, precs[1]*100.0, @@ -492,7 +498,7 @@ float BLEUScore::ComputeScore(vector<float>* precs, float* bp) const { float log_bleu = 0; if (precs) precs->clear(); int count = 0; - for (int i = 0; i < hyp_ngram_counts.size(); ++i) { + for (int i = 0; i < N(); ++i) { if (hyp_ngram_counts[i] > 0) { float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]); if (precs) precs->push_back(exp(lprec)); @@ -516,7 +522,7 @@ float BLEUScore::ComputePartialScore(vector<float>* precs, float* bp) const { float log_bleu = 0; if (precs) precs->clear(); int count = 0; - for (int i = 0; i < hyp_ngram_counts.size(); ++i) { + for (int i = 0; i < N(); ++i) { // cerr << "In CPS " << hyp_ngram_counts[i] << " " << correct_ngram_hit_counts[i] << endl; if (hyp_ngram_counts[i] > 0) { float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]); @@ -562,10 +568,10 @@ void BLEUScore::PlusEquals(const Score& delta) { void BLEUScore::PlusEquals(const Score& delta, const float scale) { const BLEUScore& d = static_cast<const BLEUScore&>(delta); - correct_ngram_hit_counts = (correct_ngram_hit_counts + d.correct_ngram_hit_counts) * scale; - hyp_ngram_counts = ( hyp_ngram_counts + d.hyp_ngram_counts) * scale; - ref_len = (ref_len + d.ref_len) * scale; - hyp_len = ( hyp_len + d.hyp_len) * scale; + correct_ngram_hit_counts = correct_ngram_hit_counts + (d.correct_ngram_hit_counts * scale); + hyp_ngram_counts = hyp_ngram_counts + (d.hyp_ngram_counts * scale); + ref_len = ref_len + (d.ref_len * scale); + hyp_len = hyp_len + (d.hyp_len * scale); } void BLEUScore::PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len){ @@ -583,11 +589,11 @@ void BLEUScore::PlusPartialEquals(const Score& delta, int oracle_e_cover, int or ScoreP BLEUScore::GetZero() const { - return ScoreP(new BLEUScore(hyp_ngram_counts.size())); + return ScoreP(new BLEUScore(N())); } ScoreP BLEUScore::GetOne() const { - return ScoreP(new BLEUScore(hyp_ngram_counts.size(),1)); + return ScoreP(new BLEUScore(N(),1)); } diff --git a/vest/ter.cc b/vest/ter.cc index b4ebc4f5..8c8494ad 100644 --- a/vest/ter.cc +++ b/vest/ter.cc @@ -9,7 +9,7 @@ #include <set> #include <valarray> #include <boost/functional/hash.hpp> - +#include <stdexcept> #include "tdict.h" const bool ter_use_average_ref_len = true; @@ -432,8 +432,12 @@ class TERScore : public Score { void ScoreDetails(string* details) const; void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){} void PlusEquals(const Score& delta, const float scale) { - stats += static_cast<const TERScore&>(delta).stats; - } + if (scale==1) + stats += static_cast<const TERScore&>(delta).stats; + if (scale==-1) + stats -= static_cast<const TERScore&>(delta).stats; + throw std::runtime_error("TERScore::PlusEquals with scale != +-1"); + } void PlusEquals(const Score& delta) { stats += static_cast<const TERScore&>(delta).stats; } |