summaryrefslogtreecommitdiff
path: root/mteval/scorer.cc
diff options
context:
space:
mode:
Diffstat (limited to 'mteval/scorer.cc')
-rw-r--r--mteval/scorer.cc630
1 files changed, 630 insertions, 0 deletions
diff --git a/mteval/scorer.cc b/mteval/scorer.cc
new file mode 100644
index 00000000..04eeaa93
--- /dev/null
+++ b/mteval/scorer.cc
@@ -0,0 +1,630 @@
+#include "scorer.h"
+
+#include <boost/lexical_cast.hpp>
+#include <map>
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <cstdio>
+#include <valarray>
+#include <algorithm>
+
+#include <boost/shared_ptr.hpp>
+
+#include "filelib.h"
+#include "ter.h"
+#include "aer_scorer.h"
+#include "comb_scorer.h"
+#include "tdict.h"
+#include "stringlib.h"
+
+using boost::shared_ptr;
+using namespace std;
+
+void Score::TimesEquals(float scale) {
+ cerr<<"UNIMPLEMENTED except for BLEU (for MIRA): Score::TimesEquals"<<endl;abort();
+}
+
+ScoreType ScoreTypeFromString(const string& st) {
+ const string sl = LowercaseString(st);
+ if (sl == "ser")
+ return SER;
+ if (sl == "ter")
+ return TER;
+ if (sl == "aer")
+ return AER;
+ if (sl == "bleu" || sl == "ibm_bleu")
+ return IBM_BLEU;
+ if (sl == "ibm_bleu_3")
+ return IBM_BLEU_3;
+ if (sl == "nist_bleu")
+ return NIST_BLEU;
+ if (sl == "koehn_bleu")
+ return Koehn_BLEU;
+ if (sl == "combi")
+ return BLEU_minus_TER_over_2;
+ cerr << "Don't understand score type '" << st << "', defaulting to ibm_bleu.\n";
+ return IBM_BLEU;
+}
+
+static char const* score_names[]={
+ "IBM_BLEU", "NIST_BLEU", "Koehn_BLEU", "TER", "BLEU_minus_TER_over_2", "SER", "AER", "IBM_BLEU_3"
+};
+
+std::string StringFromScoreType(ScoreType st) {
+ assert(st>=0 && st<sizeof(score_names)/sizeof(score_names[0]));
+ return score_names[(int)st];
+}
+
+
+Score::~Score() {}
+SentenceScorer::~SentenceScorer() {}
+
+struct length_accum {
+ template <class S>
+ float operator()(float sum,S const& ref) const {
+ return sum+ref.size();
+ }
+};
+
+template <class S>
+float avg_reflength(vector<S> refs) {
+ unsigned n=refs.size();
+ return n?accumulate(refs.begin(),refs.end(),0.,length_accum())/n:0.;
+}
+
+
+float SentenceScorer::ComputeRefLength(const Sentence &hyp) const {
+ return hyp.size(); // reasonable default? :)
+}
+
+const std::string* SentenceScorer::GetSource() const { return NULL; }
+
+class SERScore : public ScoreBase<SERScore> {
+ friend class SERScorer;
+ public:
+ SERScore() : correct(0), total(0) {}
+ float ComputePartialScore() const { return 0.0;}
+ float ComputeScore() const {
+ return static_cast<float>(correct) / static_cast<float>(total);
+ }
+ void ScoreDetails(string* details) const {
+ ostringstream os;
+ os << "SER= " << ComputeScore() << " (" << correct << '/' << total << ')';
+ *details = os.str();
+ }
+ void PlusPartialEquals(const Score& /* delta */, int /* oracle_e_cover */, int /* oracle_f_cover */, int /* src_len */){}
+
+ void PlusEquals(const Score& delta, const float scale) {
+ correct += scale*static_cast<const SERScore&>(delta).correct;
+ total += scale*static_cast<const SERScore&>(delta).total;
+ }
+ void PlusEquals(const Score& delta) {
+ correct += static_cast<const SERScore&>(delta).correct;
+ total += static_cast<const SERScore&>(delta).total;
+ }
+ ScoreP GetZero() const { return ScoreP(new SERScore); }
+ ScoreP GetOne() const { return ScoreP(new SERScore); }
+ void Subtract(const Score& rhs, Score* res) const {
+ SERScore* r = static_cast<SERScore*>(res);
+ r->correct = correct - static_cast<const SERScore&>(rhs).correct;
+ r->total = total - static_cast<const SERScore&>(rhs).total;
+ }
+ void Encode(string* out) const {
+ assert(!"not implemented");
+ }
+ bool IsAdditiveIdentity() const {
+ return (total == 0 && correct == 0); // correct is always 0 <= n <= total
+ }
+ private:
+ int correct, total;
+};
+
+std::string SentenceScorer::verbose_desc() const {
+ return desc+",ref0={ "+TD::GetString(refs[0])+" }";
+}
+
+class SERScorer : public SentenceScorer {
+ public:
+ SERScorer(const vector<vector<WordID> >& references) : SentenceScorer("SERScorer",references),refs_(references) {}
+ ScoreP ScoreCCandidate(const vector<WordID>& /* hyp */) const {
+ return ScoreP();
+ }
+ ScoreP ScoreCandidate(const vector<WordID>& hyp) const {
+ SERScore* res = new SERScore;
+ res->total = 1;
+ for (int i = 0; i < refs_.size(); ++i)
+ if (refs_[i] == hyp) res->correct = 1;
+ return ScoreP(res);
+ }
+ static ScoreP ScoreFromString(const string& data) {
+ assert(!"Not implemented");
+ }
+ private:
+ vector<vector<WordID> > refs_;
+};
+
+class BLEUScore : public ScoreBase<BLEUScore> {
+ friend class BLEUScorerBase;
+ public:
+ BLEUScore(int n) : correct_ngram_hit_counts(float(0),n), hyp_ngram_counts(float(0),n) {
+ ref_len = 0;
+ hyp_len = 0; }
+ BLEUScore(int n, int k) : correct_ngram_hit_counts(float(k),n), hyp_ngram_counts(float(k),n) {
+ ref_len = k;
+ hyp_len = k; }
+ float ComputeScore() const;
+ float ComputePartialScore() const;
+ void ScoreDetails(string* details) const;
+ void TimesEquals(float scale);
+ void PlusEquals(const Score& delta);
+ void PlusEquals(const Score& delta, const float scale);
+ void PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len);
+ ScoreP GetZero() const;
+ ScoreP GetOne() const;
+ void Subtract(const Score& rhs, Score* res) const;
+ void Encode(string* out) const;
+ bool IsAdditiveIdentity() const {
+ if (fabs(ref_len) > 0.1f || hyp_len != 0) return false;
+ for (int i = 0; i < correct_ngram_hit_counts.size(); ++i)
+ if (hyp_ngram_counts[i] != 0 ||
+ correct_ngram_hit_counts[i] != 0) return false;
+ return true;
+ }
+ private:
+ int N() const {
+ return hyp_ngram_counts.size();
+ }
+ float ComputeScore(vector<float>* precs, float* bp) const;
+ float ComputePartialScore(vector<float>* prec, float* bp) const;
+ valarray<float> correct_ngram_hit_counts;
+ valarray<float> hyp_ngram_counts;
+ float ref_len;
+ float hyp_len;
+};
+
+class BLEUScorerBase : public SentenceScorer {
+ public:
+ BLEUScorerBase(const vector<vector<WordID> >& references,
+ int n
+ );
+ ScoreP ScoreCandidate(const vector<WordID>& hyp) const;
+ ScoreP ScoreCCandidate(const vector<WordID>& hyp) const;
+ static ScoreP ScoreFromString(const string& in);
+
+ virtual float ComputeRefLength(const vector<WordID>& hyp) const = 0;
+ private:
+ struct NGramCompare {
+ int operator() (const vector<WordID>& a, const vector<WordID>& b) {
+ size_t as = a.size();
+ size_t bs = b.size();
+ const size_t s = (as < bs ? as : bs);
+ for (size_t i = 0; i < s; ++i) {
+ int d = a[i] - b[i];
+ if (d < 0) return true;
+ if (d > 0) return false;
+ }
+ return as < bs;
+ }
+ };
+ typedef map<vector<WordID>, pair<int,int>, NGramCompare> NGramCountMap;
+ void CountRef(const vector<WordID>& ref) {
+ NGramCountMap tc;
+ vector<WordID> ngram(n_);
+ int s = ref.size();
+ for (int j=0; j<s; ++j) {
+ int remaining = s-j;
+ int k = (n_ < remaining ? n_ : remaining);
+ ngram.clear();
+ for (int i=1; i<=k; ++i) {
+ ngram.push_back(ref[j + i - 1]);
+ tc[ngram].first++;
+ }
+ }
+ for (NGramCountMap::iterator i = tc.begin(); i != tc.end(); ++i) {
+ pair<int,int>& p = ngrams_[i->first];
+ if (p.first < i->second.first)
+ p = i->second;
+ }
+ }
+
+ void ComputeNgramStats(const vector<WordID>& sent,
+ valarray<float>* correct,
+ valarray<float>* hyp,
+ bool clip_counts)
+ const {
+ assert(correct->size() == n_);
+ assert(hyp->size() == n_);
+ vector<WordID> ngram(n_);
+ (*correct) *= 0;
+ (*hyp) *= 0;
+ int s = sent.size();
+ for (int j=0; j<s; ++j) {
+ int remaining = s-j;
+ int k = (n_ < remaining ? n_ : remaining);
+ ngram.clear();
+ for (int i=1; i<=k; ++i) {
+ ngram.push_back(sent[j + i - 1]);
+ pair<int,int>& p = ngrams_[ngram];
+ if(clip_counts){
+ if (p.second < p.first) {
+ ++p.second;
+ (*correct)[i-1]++;
+ }}
+ else {
+ ++p.second;
+ (*correct)[i-1]++;
+ }
+ // if the 1 gram isn't found, don't try to match don't need to match any 2- 3- .. grams:
+ if (!p.first) {
+ for (; i<=k; ++i)
+ (*hyp)[i-1]++;
+ } else {
+ (*hyp)[i-1]++;
+ }
+ }
+ }
+ }
+
+ mutable NGramCountMap ngrams_;
+ int n_;
+ vector<int> lengths_;
+};
+
+ScoreP BLEUScorerBase::ScoreFromString(const string& in) {
+ istringstream is(in);
+ int n;
+ is >> n;
+ BLEUScore* r = new BLEUScore(n);
+ is >> r->ref_len >> r->hyp_len;
+
+ for (int i = 0; i < n; ++i) {
+ is >> r->correct_ngram_hit_counts[i];
+ is >> r->hyp_ngram_counts[i];
+ }
+ return ScoreP(r);
+}
+
+class IBM_BLEUScorer : public BLEUScorerBase {
+ public:
+ IBM_BLEUScorer(const vector<vector<WordID> >& references,
+ int n=4) : BLEUScorerBase(references, n), lengths_(references.size()) {
+ for (int i=0; i < references.size(); ++i)
+ lengths_[i] = references[i].size();
+ }
+ float ComputeRefLength(const vector<WordID>& hyp) const {
+ if (lengths_.size() == 1) return lengths_[0];
+ int bestd = 2000000;
+ int hl = hyp.size();
+ int bl = -1;
+ for (vector<int>::const_iterator ci = lengths_.begin(); ci != lengths_.end(); ++ci) {
+ int cl = *ci;
+ if (abs(cl - hl) < bestd) {
+ bestd = abs(cl - hl);
+ bl = cl;
+ }
+ }
+ return bl;
+ }
+ private:
+ vector<int> lengths_;
+};
+
+class NIST_BLEUScorer : public BLEUScorerBase {
+ public:
+ NIST_BLEUScorer(const vector<vector<WordID> >& references,
+ int n=4) : BLEUScorerBase(references, n),
+ shortest_(references[0].size()) {
+ for (int i=1; i < references.size(); ++i)
+ if (references[i].size() < shortest_)
+ shortest_ = references[i].size();
+ }
+ float ComputeRefLength(const vector<WordID>& /* hyp */) const {
+ return shortest_;
+ }
+ private:
+ float shortest_;
+};
+
+class Koehn_BLEUScorer : public BLEUScorerBase {
+ public:
+ Koehn_BLEUScorer(const vector<vector<WordID> >& references,
+ int n=4) : BLEUScorerBase(references, n),
+ avg_(0) {
+ for (int i=0; i < references.size(); ++i)
+ avg_ += references[i].size();
+ avg_ /= references.size();
+ }
+ float ComputeRefLength(const vector<WordID>& /* hyp */) const {
+ return avg_;
+ }
+ private:
+ float avg_;
+};
+
+ScorerP SentenceScorer::CreateSentenceScorer(const ScoreType type,
+ const vector<vector<WordID> >& refs,
+ const string& src)
+{
+ SentenceScorer *r=0;
+ switch (type) {
+ case IBM_BLEU: r = new IBM_BLEUScorer(refs, 4);break;
+ case IBM_BLEU_3 : r = new IBM_BLEUScorer(refs,3);break;
+ case NIST_BLEU: r = new NIST_BLEUScorer(refs, 4);break;
+ case Koehn_BLEU: r = new Koehn_BLEUScorer(refs, 4);break;
+ case AER: r = new AERScorer(refs, src);break;
+ case TER: r = new TERScorer(refs);break;
+ case SER: r = new SERScorer(refs);break;
+ case BLEU_minus_TER_over_2: r = new BLEUTERCombinationScorer(refs);break;
+ default:
+ assert(!"Not implemented!");
+ }
+ return ScorerP(r);
+}
+
+ScoreP SentenceScorer::GetOne() const {
+ Sentence s;
+ return ScoreCCandidate(s)->GetOne();
+}
+
+ScoreP SentenceScorer::GetZero() const {
+ Sentence s;
+ return ScoreCCandidate(s)->GetZero();
+}
+
+ScoreP Score::GetOne(ScoreType type) {
+ std::vector<SentenceScorer::Sentence > refs;
+ return SentenceScorer::CreateSentenceScorer(type,refs)->GetOne();
+}
+
+ScoreP Score::GetZero(ScoreType type) {
+ std::vector<SentenceScorer::Sentence > refs;
+ return SentenceScorer::CreateSentenceScorer(type,refs)->GetZero();
+}
+
+
+ScoreP SentenceScorer::CreateScoreFromString(const ScoreType type, const string& in) {
+ switch (type) {
+ case IBM_BLEU:
+ case IBM_BLEU_3:
+ case NIST_BLEU:
+ case Koehn_BLEU:
+ return BLEUScorerBase::ScoreFromString(in);
+ case TER:
+ return TERScorer::ScoreFromString(in);
+ case AER:
+ return AERScorer::ScoreFromString(in);
+ case SER:
+ return SERScorer::ScoreFromString(in);
+ case BLEU_minus_TER_over_2:
+ return BLEUTERCombinationScorer::ScoreFromString(in);
+ default:
+ assert(!"Not implemented!");
+ }
+}
+
+void BLEUScore::ScoreDetails(string* details) const {
+ char buf[2000];
+ vector<float> precs(max(N(),4));
+ float bp;
+ float bleu = ComputeScore(&precs, &bp);
+ for (int i=N();i<4;++i)
+ precs[i]=0.;
+ char *bufn;
+ bufn=buf+sprintf(buf, "BLEU = %.2f, %.1f|%.1f|%.1f|%.1f (brev=%.3f)",
+ bleu*100.0,
+ precs[0]*100.0,
+ precs[1]*100.0,
+ precs[2]*100.0,
+ precs[3]*100.0,
+ bp);
+ *details = buf;
+}
+
+float BLEUScore::ComputeScore(vector<float>* precs, float* bp) const {
+ float log_bleu = 0;
+ if (precs) precs->clear();
+ int count = 0;
+ for (int i = 0; i < N(); ++i) {
+ if (hyp_ngram_counts[i] > 0) {
+ float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]);
+ if (precs) precs->push_back(exp(lprec));
+ log_bleu += lprec;
+ ++count;
+ }
+ }
+ log_bleu /= static_cast<float>(count);
+ float lbp = 0.0;
+ if (hyp_len < ref_len)
+ lbp = (hyp_len - ref_len) / hyp_len;
+ log_bleu += lbp;
+ if (bp) *bp = exp(lbp);
+ return exp(log_bleu);
+}
+
+
+//comptue scaled score for oracle retrieval
+float BLEUScore::ComputePartialScore(vector<float>* precs, float* bp) const {
+ // cerr << "Then here " << endl;
+ float log_bleu = 0;
+ if (precs) precs->clear();
+ int count = 0;
+ for (int i = 0; i < N(); ++i) {
+ // cerr << "In CPS " << hyp_ngram_counts[i] << " " << correct_ngram_hit_counts[i] << endl;
+ if (hyp_ngram_counts[i] > 0) {
+ float lprec = log(correct_ngram_hit_counts[i]) - log(hyp_ngram_counts[i]);
+ if (precs) precs->push_back(exp(lprec));
+ log_bleu += lprec;
+ ++count;
+ }
+ }
+ log_bleu /= static_cast<float>(count);
+ float lbp = 0.0;
+ if (hyp_len < ref_len)
+ lbp = (hyp_len - ref_len) / hyp_len;
+ log_bleu += lbp;
+ if (bp) *bp = exp(lbp);
+ return exp(log_bleu);
+}
+
+float BLEUScore::ComputePartialScore() const {
+ // cerr << "In here first " << endl;
+ return ComputePartialScore(NULL, NULL);
+}
+
+float BLEUScore::ComputeScore() const {
+ return ComputeScore(NULL, NULL);
+}
+
+void BLEUScore::Subtract(const Score& rhs, Score* res) const {
+ const BLEUScore& d = static_cast<const BLEUScore&>(rhs);
+ BLEUScore* o = static_cast<BLEUScore*>(res);
+ o->ref_len = ref_len - d.ref_len;
+ o->hyp_len = hyp_len - d.hyp_len;
+ o->correct_ngram_hit_counts = correct_ngram_hit_counts - d.correct_ngram_hit_counts;
+ o->hyp_ngram_counts = hyp_ngram_counts - d.hyp_ngram_counts;
+}
+
+void BLEUScore::PlusEquals(const Score& delta) {
+ const BLEUScore& d = static_cast<const BLEUScore&>(delta);
+ correct_ngram_hit_counts += d.correct_ngram_hit_counts;
+ hyp_ngram_counts += d.hyp_ngram_counts;
+ ref_len += d.ref_len;
+ hyp_len += d.hyp_len;
+}
+
+void BLEUScore::TimesEquals(float scale) {
+ correct_ngram_hit_counts *= scale;
+ hyp_ngram_counts *= scale;
+ ref_len *= scale;
+ hyp_len *= scale;
+}
+
+void BLEUScore::PlusEquals(const Score& delta, const float scale) {
+ const BLEUScore& d = static_cast<const BLEUScore&>(delta);
+ correct_ngram_hit_counts = correct_ngram_hit_counts + (d.correct_ngram_hit_counts * scale);
+ hyp_ngram_counts = hyp_ngram_counts + (d.hyp_ngram_counts * scale);
+ ref_len = ref_len + (d.ref_len * scale);
+ hyp_len = hyp_len + (d.hyp_len * scale);
+}
+
+void BLEUScore::PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len){
+ const BLEUScore& d = static_cast<const BLEUScore&>(delta);
+ correct_ngram_hit_counts += d.correct_ngram_hit_counts;
+ hyp_ngram_counts += d.hyp_ngram_counts;
+ //scale the reference length according to the size of the input sentence covered by this rule
+
+ ref_len *= (float)oracle_f_cover / src_len;
+ ref_len += d.ref_len;
+
+ hyp_len = oracle_e_cover;
+ hyp_len += d.hyp_len;
+}
+
+
+ScoreP BLEUScore::GetZero() const {
+ return ScoreP(new BLEUScore(N()));
+}
+
+ScoreP BLEUScore::GetOne() const {
+ return ScoreP(new BLEUScore(N(),1));
+}
+
+
+void BLEUScore::Encode(string* out) const {
+ ostringstream os;
+ const int n = correct_ngram_hit_counts.size();
+ os << n << ' ' << ref_len << ' ' << hyp_len;
+ for (int i = 0; i < n; ++i)
+ os << ' ' << correct_ngram_hit_counts[i] << ' ' << hyp_ngram_counts[i];
+ *out = os.str();
+}
+
+BLEUScorerBase::BLEUScorerBase(const vector<vector<WordID> >& references,
+ int n) : SentenceScorer("BLEU"+boost::lexical_cast<string>(n),references),n_(n) {
+ for (vector<vector<WordID> >::const_iterator ci = references.begin();
+ ci != references.end(); ++ci) {
+ lengths_.push_back(ci->size());
+ CountRef(*ci);
+ }
+}
+
+ScoreP BLEUScorerBase::ScoreCandidate(const vector<WordID>& hyp) const {
+ BLEUScore* bs = new BLEUScore(n_);
+ for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i)
+ i->second.second = 0;
+ ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts, true);
+ bs->ref_len = ComputeRefLength(hyp);
+ bs->hyp_len = hyp.size();
+ return ScoreP(bs);
+}
+
+ScoreP BLEUScorerBase::ScoreCCandidate(const vector<WordID>& hyp) const {
+ BLEUScore* bs = new BLEUScore(n_);
+ for (NGramCountMap::iterator i=ngrams_.begin(); i != ngrams_.end(); ++i)
+ i->second.second = 0;
+ bool clip = false;
+ ComputeNgramStats(hyp, &bs->correct_ngram_hit_counts, &bs->hyp_ngram_counts,clip);
+ bs->ref_len = ComputeRefLength(hyp);
+ bs->hyp_len = hyp.size();
+ return ScoreP(bs);
+}
+
+
+DocScorer::~DocScorer() {
+}
+
+void DocScorer::Init(
+ const ScoreType type,
+ const vector<string>& ref_files,
+ const string& src_file, bool verbose) {
+ scorers_.clear();
+ // TODO stop using valarray, start using ReadFile
+ cerr << "Loading references (" << ref_files.size() << " files)\n";
+ ReadFile srcrf;
+ if (type == AER && src_file.size() > 0) {
+ cerr << " (source=" << src_file << ")\n";
+ srcrf.Init(src_file);
+ }
+ std::vector<ReadFile> ifs(ref_files.begin(),ref_files.end());
+ for (int i=0; i < ref_files.size(); ++i) ifs[i].Init(ref_files[i]);
+ char buf[64000];
+ bool expect_eof = false;
+ int line=0;
+ while (ifs[0].get()) {
+ vector<vector<WordID> > refs(ref_files.size());
+ for (int i=0; i < ref_files.size(); ++i) {
+ istream &in=ifs[i].get();
+ if (in.eof()) break;
+ in.getline(buf, 64000);
+ refs[i].clear();
+ if (strlen(buf) == 0) {
+ if (in.eof()) {
+ if (!expect_eof) {
+ assert(i == 0);
+ expect_eof = true;
+ }
+ break;
+ }
+ } else {
+ TD::ConvertSentence(buf, &refs[i]);
+ assert(!refs[i].empty());
+ }
+ assert(!expect_eof);
+ }
+ if (!expect_eof) {
+ string src_line;
+ if (srcrf) {
+ getline(srcrf.get(), src_line);
+ map<string,string> dummy;
+ ProcessAndStripSGML(&src_line, &dummy);
+ }
+ scorers_.push_back(ScorerP(SentenceScorer::CreateSentenceScorer(type, refs, src_line)));
+ if (verbose)
+ cerr<<"doc_scorer["<<line<<"] = "<<scorers_.back()->verbose_desc()<<endl;
+ ++line;
+ }
+ }
+ cerr << "Loaded reference translations for " << scorers_.size() << " sentences.\n";
+}
+