From ad0e303c2c1c6176766efc6fce21d8f86aa3a329 Mon Sep 17 00:00:00 2001
From: redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>
Date: Sat, 23 Oct 2010 18:03:33 +0000
Subject: fix log prob / perplexity stats

git-svn-id: https://ws10smt.googlecode.com/svn/trunk@691 ec762483-ff6d-05da-a07a-a48fb63a330f
---
 training/model1.cc | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/training/model1.cc b/training/model1.cc
index 3e27689f..487ddb5f 100644
--- a/training/model1.cc
+++ b/training/model1.cc
@@ -46,8 +46,9 @@ int main(int argc, char** argv) {
         assert(src.size() > 0);
         assert(trg.size() > 0);
       }
-      denom += 1.0;
+      denom += trg.size();
       vector<double> probs(src.size() + 1);
+      const double src_logprob = -log(src.size() + 1);
       for (int j = 0; j < trg.size(); ++j) {
         const WordID& f_j = trg[j][0].label;
         double sum = 0;
@@ -79,12 +80,12 @@ int main(int argc, char** argv) {
           for (int i = 1; i <= src.size(); ++i)
             tt.Increment(src[i-1][0].label, f_j, probs[i] / sum);
         }
-        likelihood += log(sum);
+        likelihood += log(sum) + src_logprob;
       }
     }
     if (flag) { cerr << endl; }
     cerr << "  log likelihood: " << likelihood << endl;
-    cerr << "    cross entopy: " << (-likelihood / denom) << endl;
+    cerr << "   cross entropy: " << (-likelihood / denom) << endl;
     cerr << "      perplexity: " << pow(2.0, -likelihood / denom) << endl;
     if (!final_iteration) tt.Normalize();
   }
-- 
cgit v1.2.3