summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-10-23 18:03:33 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-10-23 18:03:33 +0000
commit4e66b377ebb4b73d470c0efc573f5bda773b2972 (patch)
tree408a421454c25bf0d9c0e4314774c7824ca3bd08
parentf09e46d9f15cbd33801d74058bcd7fd780daa047 (diff)
fix log prob / perplexity stats
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@691 ec762483-ff6d-05da-a07a-a48fb63a330f
-rw-r--r--training/model1.cc7
1 files changed, 4 insertions, 3 deletions
diff --git a/training/model1.cc b/training/model1.cc
index 3e27689f..487ddb5f 100644
--- a/training/model1.cc
+++ b/training/model1.cc
@@ -46,8 +46,9 @@ int main(int argc, char** argv) {
assert(src.size() > 0);
assert(trg.size() > 0);
}
- denom += 1.0;
+ denom += trg.size();
vector<double> probs(src.size() + 1);
+ const double src_logprob = -log(src.size() + 1);
for (int j = 0; j < trg.size(); ++j) {
const WordID& f_j = trg[j][0].label;
double sum = 0;
@@ -79,12 +80,12 @@ int main(int argc, char** argv) {
for (int i = 1; i <= src.size(); ++i)
tt.Increment(src[i-1][0].label, f_j, probs[i] / sum);
}
- likelihood += log(sum);
+ likelihood += log(sum) + src_logprob;
}
}
if (flag) { cerr << endl; }
cerr << " log likelihood: " << likelihood << endl;
- cerr << " cross entopy: " << (-likelihood / denom) << endl;
+ cerr << " cross entropy: " << (-likelihood / denom) << endl;
cerr << " perplexity: " << pow(2.0, -likelihood / denom) << endl;
if (!final_iteration) tt.Normalize();
}