diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2011-10-20 15:21:54 +0100 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2011-10-20 15:21:54 +0100 |
commit | 95e9ea690b87f4648215782e820e177cbe17f18b (patch) | |
tree | 629b760b534d22c2c7b6ab326a4c6fa0bca563b7 /gi/pf/base_measures.cc | |
parent | 64318d1ef80df243b85d5bc89023dd2f14ed6806 (diff) |
bidir model1 base measure
Diffstat (limited to 'gi/pf/base_measures.cc')
-rw-r--r-- | gi/pf/base_measures.cc | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/gi/pf/base_measures.cc b/gi/pf/base_measures.cc index f8ddfd32..8adb37d7 100644 --- a/gi/pf/base_measures.cc +++ b/gi/pf/base_measures.cc @@ -89,6 +89,62 @@ prob_t PhraseJointBase::p0(const vector<WordID>& vsrc, return p; } +prob_t PhraseJointBase_BiDir::p0(const vector<WordID>& vsrc, + const vector<WordID>& vtrg, + int start_src, int start_trg) const { + const int flen = vsrc.size() - start_src; + const int elen = vtrg.size() - start_trg; + prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); + prob_t uniform_trg_alignment; uniform_trg_alignment.logeq(-log(elen + 1)); + + prob_t p1; + p1.logeq(log_poisson(flen, 1.0)); // flen ~Pois(1) + // elen | flen ~Pois(flen + 0.01) + prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01)); + p1 *= ptrglen; + p1 *= kUNIFORM_SOURCE.pow(flen); // each f in F ~Uniform + for (int i = 0; i < elen; ++i) { // for each position i in E + const WordID trg = vtrg[i + start_trg]; + prob_t tp = prob_t::Zero(); + for (int j = -1; j < flen; ++j) { + const WordID src = j < 0 ? 0 : vsrc[j + start_src]; + tp += kM1MIXTURE * model1(src, trg); + tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; + } + tp *= uniform_src_alignment; // draw a_i ~uniform + p1 *= tp; // draw e_i ~Model1(f_a_i) / uniform + } + if (p1.is_0()) { + cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; + abort(); + } + + prob_t p2; + p2.logeq(log_poisson(elen, 1.0)); // elen ~Pois(1) + // flen | elen ~Pois(flen + 0.01) + prob_t psrclen; psrclen.logeq(log_poisson(flen, elen + 0.01)); + p2 *= psrclen; + p2 *= kUNIFORM_TARGET.pow(elen); // each f in F ~Uniform + for (int i = 0; i < flen; ++i) { // for each position i in E + const WordID src = vsrc[i + start_src]; + prob_t tp = prob_t::Zero(); + for (int j = -1; j < elen; ++j) { + const WordID trg = j < 0 ? 0 : vtrg[j + start_trg]; + tp += kM1MIXTURE * invmodel1(trg, src); + tp += kUNIFORM_MIXTURE * kUNIFORM_SOURCE; + } + tp *= uniform_trg_alignment; // draw a_i ~uniform + p2 *= tp; // draw e_i ~Model1(f_a_i) / uniform + } + if (p2.is_0()) { + cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; + abort(); + } + + static const prob_t kHALF(0.5); + return (p1 + p2) * kHALF; +} + JumpBase::JumpBase() : p(200) { for (unsigned src_len = 1; src_len < 200; ++src_len) { map<int, prob_t>& cpd = p[src_len]; |