diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-10-23 18:03:33 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-10-23 18:03:33 +0000 |
commit | ad0e303c2c1c6176766efc6fce21d8f86aa3a329 (patch) | |
tree | 6afdd3e5ed9164ed158eef2a52d7fcbe40ff9c22 /training | |
parent | 437436a51fa51c66c3657a9d0809e42cf23bb6e2 (diff) |
fix log prob / perplexity stats
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@691 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'training')
-rw-r--r-- | training/model1.cc | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/training/model1.cc b/training/model1.cc index 3e27689f..487ddb5f 100644 --- a/training/model1.cc +++ b/training/model1.cc @@ -46,8 +46,9 @@ int main(int argc, char** argv) { assert(src.size() > 0); assert(trg.size() > 0); } - denom += 1.0; + denom += trg.size(); vector<double> probs(src.size() + 1); + const double src_logprob = -log(src.size() + 1); for (int j = 0; j < trg.size(); ++j) { const WordID& f_j = trg[j][0].label; double sum = 0; @@ -79,12 +80,12 @@ int main(int argc, char** argv) { for (int i = 1; i <= src.size(); ++i) tt.Increment(src[i-1][0].label, f_j, probs[i] / sum); } - likelihood += log(sum); + likelihood += log(sum) + src_logprob; } } if (flag) { cerr << endl; } cerr << " log likelihood: " << likelihood << endl; - cerr << " cross entopy: " << (-likelihood / denom) << endl; + cerr << " cross entropy: " << (-likelihood / denom) << endl; cerr << " perplexity: " << pow(2.0, -likelihood / denom) << endl; if (!final_iteration) tt.Normalize(); } |