diff options
author | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-15 03:50:05 +0000 |
---|---|---|
committer | graehl <graehl@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-15 03:50:05 +0000 |
commit | f819992b0b22b4fec88c15fe13118aa6b484b91b (patch) | |
tree | 1bf835e4b29ca926a4ca33a2a57743559c9ba58f /vest/scorer.cc | |
parent | c61c0f2f664eebcc434ce76e6767fccdbdf6fae2 (diff) |
oracle bleu refactor
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@259 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'vest/scorer.cc')
-rw-r--r-- | vest/scorer.cc | 48 |
1 files changed, 37 insertions, 11 deletions
diff --git a/vest/scorer.cc b/vest/scorer.cc index 524b15a5..8f981af6 100644 --- a/vest/scorer.cc +++ b/vest/scorer.cc @@ -6,6 +6,7 @@ #include <fstream> #include <cstdio> #include <valarray> +#include <algorithm> #include <boost/shared_ptr.hpp> @@ -47,8 +48,37 @@ ScoreType ScoreTypeFromString(const string& st) { return IBM_BLEU; } +static char const* score_names[]={ + "IBM_BLEU", "NIST_BLEU", "Koehn_BLEU", "TER", "BLEU_minus_TER_over_2", "SER", "AER", "IBM_BLEU_3" +}; + +std::string StringFromScoreType(ScoreType st) { + assert(st>=0 && st<sizeof(score_names)/sizeof(score_names[0])); + return score_names[(int)st]; +} + + Score::~Score() {} SentenceScorer::~SentenceScorer() {} + +struct length_accum { + template <class S> + float operator()(float sum,S const& ref) const { + return sum+ref.size(); + } +}; + +template <class S> +float avg_reflength(vector<S> refs) { + unsigned n=refs.size(); + return n?accumulate(refs.begin(),refs.end(),0.,length_accum())/n:0.; +} + + +float SentenceScorer::ComputeRefLength(const Sentence &hyp) const { + return hyp.size(); // reasonable default? :) +} + const std::string* SentenceScorer::GetSource() const { return NULL; } class SERScore : public Score { @@ -64,9 +94,9 @@ class SERScore : public Score { os << "SER= " << ComputeScore() << " (" << correct << '/' << total << ')'; *details = os.str(); } - void PlusPartialEquals(const Score& delta, int oracle_e_cover, int oracle_f_cover, int src_len){} - - void PlusEquals(const Score& delta, const float scale) { + void PlusPartialEquals(const Score& /* delta */, int /* oracle_e_cover */, int /* oracle_f_cover */, int /* src_len */){} + + void PlusEquals(const Score& delta, const float /* scale */) { correct += static_cast<const SERScore&>(delta).correct; total += static_cast<const SERScore&>(delta).total; } @@ -94,7 +124,7 @@ class SERScore : public Score { class SERScorer : public SentenceScorer { public: SERScorer(const vector<vector<WordID> >& references) : refs_(references) {} - Score* ScoreCCandidate(const vector<WordID>& hyp) const { + Score* ScoreCCandidate(const vector<WordID>& /* hyp */) const { Score* a = NULL; return a; } @@ -120,7 +150,7 @@ class BLEUScore : public Score { hyp_len = 0; } BLEUScore(int n, int k) : correct_ngram_hit_counts(float(k),float(n)), hyp_ngram_counts(float(k),float(n)) { ref_len = k; - hyp_len = k; } + hyp_len = k; } float ComputeScore() const; float ComputePartialScore() const; void ScoreDetails(string* details) const; @@ -156,7 +186,6 @@ class BLEUScorerBase : public SentenceScorer { Score* ScoreCCandidate(const vector<WordID>& hyp) const; static Score* ScoreFromString(const string& in); - protected: virtual float ComputeRefLength(const vector<WordID>& hyp) const = 0; private: struct NGramCompare { @@ -257,7 +286,6 @@ class IBM_BLEUScorer : public BLEUScorerBase { for (int i=0; i < references.size(); ++i) lengths_[i] = references[i].size(); } - protected: float ComputeRefLength(const vector<WordID>& hyp) const { if (lengths_.size() == 1) return lengths_[0]; int bestd = 2000000; @@ -285,7 +313,6 @@ class NIST_BLEUScorer : public BLEUScorerBase { if (references[i].size() < shortest_) shortest_ = references[i].size(); } - protected: float ComputeRefLength(const vector<WordID>& /* hyp */) const { return shortest_; } @@ -302,7 +329,6 @@ class Koehn_BLEUScorer : public BLEUScorerBase { avg_ += references[i].size(); avg_ /= references.size(); } - protected: float ComputeRefLength(const vector<WordID>& /* hyp */) const { return avg_; } @@ -520,10 +546,10 @@ void BLEUScore::PlusPartialEquals(const Score& delta, int oracle_e_cover, int or correct_ngram_hit_counts += d.correct_ngram_hit_counts; hyp_ngram_counts += d.hyp_ngram_counts; //scale the reference length according to the size of the input sentence covered by this rule - + ref_len *= (float)oracle_f_cover / src_len; ref_len += d.ref_len; - + hyp_len = oracle_e_cover; hyp_len += d.hyp_len; } |