From 8aa29810bb77611cc20b7a384897ff6703783ea1 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 18 Nov 2012 13:35:42 -0500 Subject: major restructure of the training code --- dtrain/score.h | 212 --------------------------------------------------------- 1 file changed, 212 deletions(-) delete mode 100644 dtrain/score.h (limited to 'dtrain/score.h') diff --git a/dtrain/score.h b/dtrain/score.h deleted file mode 100644 index f317c903..00000000 --- a/dtrain/score.h +++ /dev/null @@ -1,212 +0,0 @@ -#ifndef _DTRAIN_SCORE_H_ -#define _DTRAIN_SCORE_H_ - -#include "kbestget.h" - -using namespace std; - -namespace dtrain -{ - - -struct NgramCounts -{ - unsigned N_; - map clipped_; - map sum_; - - NgramCounts(const unsigned N) : N_(N) { Zero(); } - - inline void - operator+=(const NgramCounts& rhs) - { - if (rhs.N_ > N_) Resize(rhs.N_); - for (unsigned i = 0; i < N_; i++) { - this->clipped_[i] += rhs.clipped_.find(i)->second; - this->sum_[i] += rhs.sum_.find(i)->second; - } - } - - inline const NgramCounts - operator+(const NgramCounts &other) const - { - NgramCounts result = *this; - result += other; - return result; - } - - inline void - operator*=(const score_t rhs) - { - for (unsigned i = 0; i < N_; i++) { - this->clipped_[i] *= rhs; - this->sum_[i] *= rhs; - } - } - - inline void - Add(const unsigned count, const unsigned ref_count, const unsigned i) - { - assert(i < N_); - if (count > ref_count) { - clipped_[i] += ref_count; - } else { - clipped_[i] += count; - } - sum_[i] += count; - } - - inline void - Zero() - { - for (unsigned i = 0; i < N_; i++) { - clipped_[i] = 0.; - sum_[i] = 0.; - } - } - - inline void - One() - { - for (unsigned i = 0; i < N_; i++) { - clipped_[i] = 1.; - sum_[i] = 1.; - } - } - - inline void - Print() - { - for (unsigned i = 0; i < N_; i++) { - cout << i+1 << "grams (clipped):\t" << clipped_[i] << endl; - cout << i+1 << "grams:\t\t\t" << sum_[i] << endl; - } - } - - inline void Resize(unsigned N) - { - if (N == N_) return; - else if (N > N_) { - for (unsigned i = N_; i < N; i++) { - clipped_[i] = 0.; - sum_[i] = 0.; - } - } else { // N < N_ - for (unsigned i = N_-1; i > N-1; i--) { - clipped_.erase(i); - sum_.erase(i); - } - } - N_ = N; - } -}; - -typedef map, unsigned> Ngrams; - -inline Ngrams -make_ngrams(const vector& s, const unsigned N) -{ - Ngrams ngrams; - vector ng; - for (size_t i = 0; i < s.size(); i++) { - ng.clear(); - for (unsigned j = i; j < min(i+N, s.size()); j++) { - ng.push_back(s[j]); - ngrams[ng]++; - } - } - return ngrams; -} - -inline NgramCounts -make_ngram_counts(const vector& hyp, const vector& ref, const unsigned N) -{ - Ngrams hyp_ngrams = make_ngrams(hyp, N); - Ngrams ref_ngrams = make_ngrams(ref, N); - NgramCounts counts(N); - Ngrams::iterator it; - Ngrams::iterator ti; - for (it = hyp_ngrams.begin(); it != hyp_ngrams.end(); it++) { - ti = ref_ngrams.find(it->first); - if (ti != ref_ngrams.end()) { - counts.Add(it->second, ti->second, it->first.size() - 1); - } else { - counts.Add(it->second, 0, it->first.size() - 1); - } - } - return counts; -} - -struct BleuScorer : public LocalScorer -{ - score_t Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len); - score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); -}; - -struct StupidBleuScorer : public LocalScorer -{ - score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); -}; - -struct SmoothBleuScorer : public LocalScorer -{ - score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); -}; - -struct SumBleuScorer : public LocalScorer -{ - score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); -}; - -struct SumExpBleuScorer : public LocalScorer -{ - score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); -}; - -struct SumWhateverBleuScorer : public LocalScorer -{ - score_t Score(vector& hyp, vector& ref, const unsigned /*rank*/, const unsigned /*src_len*/); -}; - -struct ApproxBleuScorer : public BleuScorer -{ - NgramCounts glob_onebest_counts_; - unsigned glob_hyp_len_, glob_ref_len_, glob_src_len_; - score_t discount_; - - ApproxBleuScorer(unsigned N, score_t d) : glob_onebest_counts_(NgramCounts(N)), discount_(d) - { - glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0; - } - - inline void Reset() { - glob_onebest_counts_.Zero(); - glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0.; - } - - score_t Score(vector& hyp, vector& ref, const unsigned rank, const unsigned src_len); -}; - -struct LinearBleuScorer : public BleuScorer -{ - unsigned onebest_len_; - NgramCounts onebest_counts_; - - LinearBleuScorer(unsigned N) : onebest_len_(1), onebest_counts_(N) - { - onebest_counts_.One(); - } - - score_t Score(vector& hyp, vector& ref, const unsigned rank, const unsigned /*src_len*/); - - inline void Reset() { - onebest_len_ = 1; - onebest_counts_.One(); - } -}; - - -} // namespace - -#endif - -- cgit v1.2.3