summaryrefslogtreecommitdiff
path: root/dtrain/score.cc
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-09-26 21:51:52 +0200
committerPatrick Simianer <p@simianer.de>2011-09-26 21:51:52 +0200
commit8bb00a2a2775442418f1cb7c041f7cba5d6e0d42 (patch)
tree7ac3d9f6941ef1f5142d009c485f2b21c33648de /dtrain/score.cc
parenta433961e78a958d25a6d708bfd89e9655d1217c7 (diff)
got rid of scoring loop
Diffstat (limited to 'dtrain/score.cc')
-rw-r--r--dtrain/score.cc37
1 files changed, 13 insertions, 24 deletions
diff --git a/dtrain/score.cc b/dtrain/score.cc
index 9b22508b..93c4e80b 100644
--- a/dtrain/score.cc
+++ b/dtrain/score.cc
@@ -22,17 +22,17 @@ BleuScorer::Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref
score_t sum = 0;
for (unsigned i = 0; i < M; i++) {
if (counts.clipped[i] == 0 || counts.sum[i] == 0) return 0;
- sum += w_[i] * log((score_t)counts.clipped[i] / counts.sum[i]);
+ sum += w_[i] * log((score_t)counts.clipped[i]/counts.sum[i]);
}
return brevity_penaly(hyp_len, ref_len) * exp(sum);
}
score_t
-BleuScorer::Score(ScoredHyp& hyp, vector<WordID>& ref_ids, unsigned id)
+BleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref)
{
- unsigned hyp_len = hyp.w.size(), ref_len = ref_ids.size();
+ unsigned hyp_len = hyp.size(), ref_len = ref.size();
if (hyp_len == 0 || ref_len == 0) return 0;
- NgramCounts counts = make_ngram_counts(hyp.w, ref_ids, N_);
+ NgramCounts counts = make_ngram_counts(hyp, ref, N_);
return Bleu(counts, hyp_len, ref_len);
}
@@ -47,30 +47,18 @@ BleuScorer::Score(ScoredHyp& hyp, vector<WordID>& ref_ids, unsigned id)
* NOTE: 0 iff no 1gram match
*/
score_t
-StupidBleuScorer::Score(ScoredHyp& hyp, vector<WordID>& ref_ids, unsigned id)
+StupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref)
{
- unsigned hyp_len = hyp.w.size(), ref_len = ref_ids.size();
+ unsigned hyp_len = hyp.size(), ref_len = ref.size();
if (hyp_len == 0 || ref_len == 0) return 0;
- NgramCounts counts = make_ngram_counts(hyp.w, ref_ids, N_);
+ NgramCounts counts = make_ngram_counts(hyp, ref, N_);
unsigned M = N_;
if (ref_len < N_) M = ref_len;
score_t sum = 0, add = 0;
for (unsigned i = 0; i < M; i++) {
if (i == 1) add = 1;
- //cout << ((score_t)counts.clipped[i] + add) << "/" << counts.sum[i] +add << "." << endl;
- //cout << "w_[i] " << w_[i] << endl;
- sum += w_[i] * log(((score_t)counts.clipped[i] + add) / ((counts.sum[i] + add)));
- //cout << "sum += "<< w_[i] * log(((score_t)counts.clipped[i] + add) / ((counts.sum[i] + add))) << endl;
+ sum += w_[i] * log(((score_t)counts.clipped[i] + add)/((counts.sum[i] + add)));
}
- /*cout << ref_ids << endl;
- cout << hyp.w << endl;
- cout << "ref_len " << ref_len << endl;
- cout << "hyp_len " << hyp_len << endl;
- cout << "bp " << brevity_penaly(hyp_len, ref_len) << endl;
- cout << "exp(sum) " << exp(sum) << endl;
- counts.Print();
- cout << brevity_penaly(hyp_len, ref_len) * exp(sum) << endl;
- cout << "---" << endl;*/
return brevity_penaly(hyp_len, ref_len) * exp(sum);
}
@@ -84,21 +72,22 @@ StupidBleuScorer::Score(ScoredHyp& hyp, vector<WordID>& ref_ids, unsigned id)
* NOTE: max is 0.9375
*/
score_t
-SmoothBleuScorer::Score(ScoredHyp& hyp, vector<WordID>& ref_ids, unsigned id)
+SmoothBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref)
{
- unsigned hyp_len = hyp.w.size(), ref_len = ref_ids.size();
+ unsigned hyp_len = hyp.size(), ref_len = ref.size();
if (hyp_len == 0 || ref_len == 0) return 0;
- NgramCounts counts = make_ngram_counts(hyp.w, ref_ids, N_);
+ NgramCounts counts = make_ngram_counts(hyp, ref, N_);
score_t sum = 0;
unsigned j = 1;
for (unsigned i = 0; i < N_; i++) {
if (counts.clipped[i] == 0 || counts.sum[i] == 0) continue;
- sum += exp((w_[i] * log((score_t)counts.clipped[i]/counts.sum[i]))) / pow(2, N_-j+1);
+ sum += exp((w_[i] * log((score_t)counts.clipped[i]/counts.sum[i])))/pow(2, N_-j+1);
j++;
}
return brevity_penaly(hyp_len, ref_len) * sum;
}
+// FIXME
/*
* approx. bleu
*