diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-12-01 00:03:35 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-12-01 00:03:35 +0000 |
commit | 74615686493ad495c8e7802c96e5257da7e7f934 (patch) | |
tree | 1a24ec2b4d320dbbb9e0bead833cf921ebc2a8eb /training/ttables.h | |
parent | 7ebf32cd42fb1ea3db33603a7585792189b06d4a (diff) |
optional variational bayes
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@734 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'training/ttables.h')
-rw-r--r-- | training/ttables.h | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/training/ttables.h b/training/ttables.h index 53f5f2ab..50d85a68 100644 --- a/training/ttables.h +++ b/training/ttables.h @@ -6,6 +6,7 @@ #include "wordid.h" #include "tdict.h" +#include "em_utils.h" class TTable { public: @@ -29,6 +30,19 @@ class TTable { inline void Increment(const int& e, const int& f, double x) { counts[e][f] += x; } + void NormalizeVB(const double alpha) { + ttable.swap(counts); + for (Word2Word2Double::iterator cit = ttable.begin(); + cit != ttable.end(); ++cit) { + double tot = 0; + Word2Double& cpd = cit->second; + for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it) + tot += it->second + alpha; + for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it) + it->second = exp(digamma(it->second + alpha) - digamma(tot)); + } + counts.clear(); + } void Normalize() { ttable.swap(counts); for (Word2Word2Double::iterator cit = ttable.begin(); |