From 2e605eb2745e56619b16fdbcb8095e0a6543ab27 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Wed, 3 Aug 2011 01:29:52 +0200 Subject: refactoring, cleaning up --- dtrain/score.h | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 dtrain/score.h (limited to 'dtrain/score.h') diff --git a/dtrain/score.h b/dtrain/score.h new file mode 100644 index 00000000..e9130e18 --- /dev/null +++ b/dtrain/score.h @@ -0,0 +1,111 @@ +#ifndef _DTRAIN_SCORE_H_ +#define _DTRAIN_SCORE_H_ + + +#include +#include +#include +#include +#include + +#include "wordid.h" + +using namespace std; + + +namespace dtrain +{ + + +/* + * ScorePair + * + */ +struct ScorePair +{ + ScorePair(double modelscore, double score) : modelscore_(modelscore), score_(score) {} + double modelscore_, score_; + double GetModelScore() { return modelscore_; } + double GetScore() { return score_; } +}; + +typedef vector Scores; + + +/* + * NgramCounts + * + */ +struct NgramCounts +{ + NgramCounts( const size_t N ) : N_( N ) { + reset(); + } + size_t N_; + map clipped; + map sum; + + void + operator+=( const NgramCounts& rhs ) + { + assert( N_ == rhs.N_ ); + for ( size_t i = 0; i < N_; i++ ) { + this->clipped[i] += rhs.clipped.find(i)->second; + this->sum[i] += rhs.sum.find(i)->second; + } + } + + void + add( size_t count, size_t ref_count, size_t i ) + { + assert( i < N_ ); + if ( count > ref_count ) { + clipped[i] += ref_count; + sum[i] += count; + } else { + clipped[i] += count; + sum[i] += count; + } + } + + void + reset() + { + size_t i; + for ( i = 0; i < N_; i++ ) { + clipped[i] = 0; + sum[i] = 0; + } + } + + void + print() + { + for ( size_t i = 0; i < N_; i++ ) { + cout << i+1 << "grams (clipped):\t" << clipped[i] << endl; + cout << i+1 << "grams:\t\t\t" << sum[i] << endl; + } + } +}; + + +typedef map, size_t> Ngrams; +Ngrams make_ngrams( vector& s, size_t N ); +NgramCounts make_ngram_counts( vector hyp, vector ref, size_t N ); + +double brevity_penaly( const size_t hyp_len, const size_t ref_len ); +double bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N, + vector weights = vector() ); +double stupid_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, size_t N, + vector weights = vector() ); +double smooth_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N, + vector weights = vector() ); +double approx_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N, + vector weights = vector() ); + + +} // namespace + + +#endif + -- cgit v1.2.3