diff options
Diffstat (limited to 'mteval/ns_ter.cc')
-rw-r--r-- | mteval/ns_ter.cc | 126 |
1 files changed, 25 insertions, 101 deletions
diff --git a/mteval/ns_ter.cc b/mteval/ns_ter.cc index 14dc6e49..8c969e58 100644 --- a/mteval/ns_ter.cc +++ b/mteval/ns_ter.cc @@ -1,15 +1,11 @@ #include "ns_ter.h" -#include <cstdio> #include <cassert> #include <iostream> #include <limits> -#include <sstream> #include <tr1/unordered_map> #include <set> -#include <valarray> #include <boost/functional/hash.hpp> -#include <stdexcept> #include "tdict.h" static const bool ter_use_average_ref_len = true; @@ -25,7 +21,7 @@ static const unsigned kDUMMY_LAST_ENTRY = 5; using namespace std; using namespace std::tr1; -#if 0 +namespace NewScorer { struct COSTS { static const float substitution; @@ -82,7 +78,7 @@ class TERScorerImpl { enum TransType { MATCH, SUBSTITUTION, INSERTION, DELETION }; explicit TERScorerImpl(const vector<WordID>& ref) : ref_(ref) { - for (int i = 0; i < ref.size(); ++i) + for (unsigned i = 0; i < ref.size(); ++i) rwexists_.insert(ref[i]); } @@ -95,7 +91,7 @@ class TERScorerImpl { } private: - vector<WordID> ref_; + const vector<WordID>& ref_; set<WordID> rwexists_; typedef unordered_map<vector<WordID>, set<int>, boost::hash<vector<WordID> > > NgramToIntsMap; @@ -421,68 +417,7 @@ class TERScorerImpl { } }; -class TERScore : public ScoreBase<TERScore> { - friend class TERScorer; - - public: - - TERScore() : stats(0,kDUMMY_LAST_ENTRY) {} - float ComputePartialScore() const { return 0.0;} - float ComputeScore() const { - float edits = static_cast<float>(stats[kINSERTIONS] + stats[kDELETIONS] + stats[kSUBSTITUTIONS] + stats[kSHIFTS]); - return edits / static_cast<float>(stats[kREF_WORDCOUNT]); - } - void ScoreDetails(string* details) const; - void PlusPartialEquals(const Score& rhs, int oracle_e_cover, int oracle_f_cover, int src_len){} - void PlusEquals(const Score& delta, const float scale) { - if (scale==1) - stats += static_cast<const TERScore&>(delta).stats; - if (scale==-1) - stats -= static_cast<const TERScore&>(delta).stats; - throw std::runtime_error("TERScore::PlusEquals with scale != +-1"); - } - void PlusEquals(const Score& delta) { - stats += static_cast<const TERScore&>(delta).stats; - } - - ScoreP GetZero() const { - return ScoreP(new TERScore); - } - ScoreP GetOne() const { - return ScoreP(new TERScore); - } - void Subtract(const Score& rhs, Score* res) const { - static_cast<TERScore*>(res)->stats = stats - static_cast<const TERScore&>(rhs).stats; - } - void Encode(std::string* out) const { - ostringstream os; - os << stats[kINSERTIONS] << ' ' - << stats[kDELETIONS] << ' ' - << stats[kSUBSTITUTIONS] << ' ' - << stats[kSHIFTS] << ' ' - << stats[kREF_WORDCOUNT]; - *out = os.str(); - } - bool IsAdditiveIdentity() const { - for (int i = 0; i < kDUMMY_LAST_ENTRY; ++i) - if (stats[i] != 0) return false; - return true; - } - private: - valarray<int> stats; -}; - -ScoreP TERScorer::ScoreFromString(const std::string& data) { - istringstream is(data); - TERScore* r = new TERScore; - is >> r->stats[TERScore::kINSERTIONS] - >> r->stats[TERScore::kDELETIONS] - >> r->stats[TERScore::kSUBSTITUTIONS] - >> r->stats[TERScore::kSHIFTS] - >> r->stats[TERScore::kREF_WORDCOUNT]; - return ScoreP(r); -} - +#if 0 void TERScore::ScoreDetails(std::string* details) const { char buf[200]; sprintf(buf, "TER = %.2f, %3d|%3d|%3d|%3d (len=%d)", @@ -494,54 +429,43 @@ void TERScore::ScoreDetails(std::string* details) const { stats[kREF_WORDCOUNT]); *details = buf; } +#endif -TERScorer::~TERScorer() { - for (vector<TERScorerImpl*>::iterator i = impl_.begin(); i != impl_.end(); ++i) - delete *i; -} +} // namespace NewScorer -TERScorer::TERScorer(const vector<vector<WordID> >& refs) : impl_(refs.size()) { +void TERMetric::ComputeSufficientStatistics(const vector<WordID>& hyp, + const vector<vector<WordID> >& refs, + SufficientStats* out) const { + out->fields.resize(kDUMMY_LAST_ENTRY); + float best_score = numeric_limits<float>::max(); + unsigned avg_len = 0; for (int i = 0; i < refs.size(); ++i) - impl_[i] = new TERScorerImpl(refs[i]); -} + avg_len += refs[i].size(); + avg_len /= refs.size(); -ScoreP TERScorer::ScoreCCandidate(const vector<WordID>& hyp) const { - return ScoreP(); -} - -ScoreP TERScorer::ScoreCandidate(const std::vector<WordID>& hyp) const { - float best_score = numeric_limits<float>::max(); - TERScore* res = new TERScore; - int avg_len = 0; - for (int i = 0; i < impl_.size(); ++i) - avg_len += impl_[i]->GetRefLength(); - avg_len /= impl_.size(); - for (int i = 0; i < impl_.size(); ++i) { + for (int i = 0; i < refs.size(); ++i) { int subs, ins, dels, shifts; - float score = impl_[i]->Calculate(hyp, &subs, &ins, &dels, &shifts); + NewScorer::TERScorerImpl ter(refs[i]); + float score = ter.Calculate(hyp, &subs, &ins, &dels, &shifts); // cerr << "Component TER cost: " << score << endl; if (score < best_score) { - res->stats[TERScore::kINSERTIONS] = ins; - res->stats[TERScore::kDELETIONS] = dels; - res->stats[TERScore::kSUBSTITUTIONS] = subs; - res->stats[TERScore::kSHIFTS] = shifts; + out->fields[kINSERTIONS] = ins; + out->fields[kDELETIONS] = dels; + out->fields[kSUBSTITUTIONS] = subs; + out->fields[kSHIFTS] = shifts; if (ter_use_average_ref_len) { - res->stats[TERScore::kREF_WORDCOUNT] = avg_len; + out->fields[kREF_WORDCOUNT] = avg_len; } else { - res->stats[TERScore::kREF_WORDCOUNT] = impl_[i]->GetRefLength(); + out->fields[kREF_WORDCOUNT] = refs[i].size(); } best_score = score; } } - return ScoreP(res); } -#endif -void TERMetric::ComputeSufficientStatistics(const vector<WordID>& hyp, - const vector<vector<WordID> >& refs, - SufficientStats* out) const { - out->fields.resize(kDUMMY_LAST_ENTRY); +unsigned TERMetric::SufficientStatisticsVectorSize() const { + return kDUMMY_LAST_ENTRY; } float TERMetric::ComputeScore(const SufficientStats& stats) const { |