summaryrefslogtreecommitdiff
path: root/dtrain/score.h
diff options
context:
space:
mode:
Diffstat (limited to 'dtrain/score.h')
-rw-r--r--dtrain/score.h111
1 files changed, 111 insertions, 0 deletions
diff --git a/dtrain/score.h b/dtrain/score.h
new file mode 100644
index 00000000..e9130e18
--- /dev/null
+++ b/dtrain/score.h
@@ -0,0 +1,111 @@
+#ifndef _DTRAIN_SCORE_H_
+#define _DTRAIN_SCORE_H_
+
+
+#include <iostream>
+#include <vector>
+#include <map>
+#include <cassert>
+#include <cmath>
+
+#include "wordid.h"
+
+using namespace std;
+
+
+namespace dtrain
+{
+
+
+/*
+ * ScorePair
+ *
+ */
+struct ScorePair
+{
+ ScorePair(double modelscore, double score) : modelscore_(modelscore), score_(score) {}
+ double modelscore_, score_;
+ double GetModelScore() { return modelscore_; }
+ double GetScore() { return score_; }
+};
+
+typedef vector<ScorePair> Scores;
+
+
+/*
+ * NgramCounts
+ *
+ */
+struct NgramCounts
+{
+ NgramCounts( const size_t N ) : N_( N ) {
+ reset();
+ }
+ size_t N_;
+ map<size_t, size_t> clipped;
+ map<size_t, size_t> sum;
+
+ void
+ operator+=( const NgramCounts& rhs )
+ {
+ assert( N_ == rhs.N_ );
+ for ( size_t i = 0; i < N_; i++ ) {
+ this->clipped[i] += rhs.clipped.find(i)->second;
+ this->sum[i] += rhs.sum.find(i)->second;
+ }
+ }
+
+ void
+ add( size_t count, size_t ref_count, size_t i )
+ {
+ assert( i < N_ );
+ if ( count > ref_count ) {
+ clipped[i] += ref_count;
+ sum[i] += count;
+ } else {
+ clipped[i] += count;
+ sum[i] += count;
+ }
+ }
+
+ void
+ reset()
+ {
+ size_t i;
+ for ( i = 0; i < N_; i++ ) {
+ clipped[i] = 0;
+ sum[i] = 0;
+ }
+ }
+
+ void
+ print()
+ {
+ for ( size_t i = 0; i < N_; i++ ) {
+ cout << i+1 << "grams (clipped):\t" << clipped[i] << endl;
+ cout << i+1 << "grams:\t\t\t" << sum[i] << endl;
+ }
+ }
+};
+
+
+typedef map<vector<WordID>, size_t> Ngrams;
+Ngrams make_ngrams( vector<WordID>& s, size_t N );
+NgramCounts make_ngram_counts( vector<WordID> hyp, vector<WordID> ref, size_t N );
+
+double brevity_penaly( const size_t hyp_len, const size_t ref_len );
+double bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N,
+ vector<float> weights = vector<float>() );
+double stupid_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, size_t N,
+ vector<float> weights = vector<float>() );
+double smooth_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N,
+ vector<float> weights = vector<float>() );
+double approx_bleu( NgramCounts& counts, const size_t hyp_len, const size_t ref_len, const size_t N,
+ vector<float> weights = vector<float>() );
+
+
+} // namespace
+
+
+#endif
+