#ifndef _DTRAIN_SCORE_H_ #define _DTRAIN_SCORE_H_ #include #include #include #include #include #include "wordid.h" using namespace std; namespace dtrain { /* * NgramCounts * */ struct NgramCounts { NgramCounts( const size_t N ) : N_( N ) { reset(); } size_t N_; map clipped; map sum; void operator+=( const NgramCounts& rhs ) { assert( N_ == rhs.N_ ); for ( size_t i = 0; i < N_; i++ ) { this->clipped[i] += rhs.clipped.find(i)->second; this->sum[i] += rhs.sum.find(i)->second; } } const NgramCounts operator+( const NgramCounts &other ) const { NgramCounts result = *this; result += other; return result; } void add( size_t count, size_t ref_count, size_t i ) { assert( i < N_ ); if ( count > ref_count ) { clipped[i] += ref_count; sum[i] += count; } else { clipped[i] += count; sum[i] += count; } } void reset() { size_t i; for ( i = 0; i < N_; i++ ) { clipped[i] = 0; sum[i] = 0; } } void print() { for ( size_t i = 0; i < N_; i++ ) { cout << i+1 << "grams (clipped):\t" << clipped[i] << endl; cout << i+1 << "grams:\t\t\t" << sum[i] << endl; } } }; typedef map, size_t> Ngrams; Ngrams make_ngrams( vector& s, size_t N ); NgramCounts make_ngram_counts( vector hyp, vector ref, size_t N ); double brevity_penaly( const size_t hyp_len, const size_t ref_len ); double bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N, vector weights = vector() ); double stupid_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, size_t N, vector weights = vector() ); double smooth_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N, vector weights = vector() ); double approx_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N, vector weights = vector() ); } // namespace #endif