diff options
| author | Patrick Simianer <p@simianer.de> | 2012-03-13 09:24:47 +0100 | 
|---|---|---|
| committer | Patrick Simianer <p@simianer.de> | 2012-03-13 09:24:47 +0100 | 
| commit | ef6085e558e26c8819f1735425761103021b6470 (patch) | |
| tree | 5cf70e4c48c64d838e1326b5a505c8c4061bff4a /gi/pf/quasi_model2.h | |
| parent | 10a232656a0c882b3b955d2bcfac138ce11e8a2e (diff) | |
| parent | dfbc278c1057555fda9312291c8024049e00b7d8 (diff) | |
merge with upstream
Diffstat (limited to 'gi/pf/quasi_model2.h')
| -rw-r--r-- | gi/pf/quasi_model2.h | 166 | 
1 files changed, 166 insertions, 0 deletions
| diff --git a/gi/pf/quasi_model2.h b/gi/pf/quasi_model2.h new file mode 100644 index 00000000..588c8f84 --- /dev/null +++ b/gi/pf/quasi_model2.h @@ -0,0 +1,166 @@ +#ifndef _QUASI_MODEL2_H_ +#define _QUASI_MODEL2_H_ + +#include <vector> +#include <cmath> +#include <tr1/unordered_map> +#include "boost/functional.hpp" +#include "prob.h" +#include "array2d.h" +#include "slice_sampler.h" +#include "m.h" + +struct AlignmentObservation { +  AlignmentObservation() : src_len(), trg_len(), j(), a_j() {} +  AlignmentObservation(unsigned sl, unsigned tl, unsigned tw, unsigned sw) : +      src_len(sl), trg_len(tl), j(tw), a_j(sw) {} +  unsigned short src_len; +  unsigned short trg_len; +  unsigned short j; +  unsigned short a_j; +}; + +inline size_t hash_value(const AlignmentObservation& o) { +  return reinterpret_cast<const size_t&>(o); +} + +inline bool operator==(const AlignmentObservation& a, const AlignmentObservation& b) { +  return hash_value(a) == hash_value(b); +} + +struct QuasiModel2 { +  explicit QuasiModel2(double alpha, double pnull = 0.1) : +      alpha_(alpha), +      pnull_(pnull), +      pnotnull_(1 - pnull) {} + +  // a_j = 0 => NULL; src_len does *not* include null +  prob_t Prob(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) const { +    if (!a_j) return pnull_; +    return pnotnull_ * +       prob_t(UnnormalizedProb(a_j, j, src_len, trg_len, alpha_) / GetOrComputeZ(j, src_len, trg_len)); +  } + +  void Increment(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) { +    assert(a_j <= src_len); +    assert(j < trg_len); +    ++obs_[AlignmentObservation(src_len, trg_len, j, a_j)]; +  } + +  void Decrement(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) { +    const AlignmentObservation ao(src_len, trg_len, j, a_j); +    int &cc = obs_[ao]; +    assert(cc > 0); +    --cc; +    if (!cc) obs_.erase(ao); +  } + +  struct PNullResampler { +    PNullResampler(const QuasiModel2& m) : m_(m) {} +    const QuasiModel2& m_; +    double operator()(const double& proposed_pnull) const { +      return log(m_.Likelihood(m_.alpha_, proposed_pnull)); +    } +  }; + +  struct AlphaResampler { +    AlphaResampler(const QuasiModel2& m) : m_(m) {} +    const QuasiModel2& m_; +    double operator()(const double& proposed_alpha) const { +      return log(m_.Likelihood(proposed_alpha, m_.pnull_.as_float())); +    } +  }; + +  void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) { +    const PNullResampler dr(*this); +    const AlphaResampler ar(*this); +    for (unsigned i = 0; i < nloop; ++i) { +      double pnull = slice_sampler1d(dr, pnull_.as_float(), *rng, 0.00000001, +                            1.0, 0.0, niterations, 100*niterations); +      pnull_ = prob_t(pnull); +      alpha_ = slice_sampler1d(ar, alpha_, *rng, 0.00000001, +                              std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); +    } +    std::cerr << "QuasiModel2(alpha=" << alpha_ << ",p_null=" +              << pnull_.as_float() << ") = " << Likelihood() << std::endl; +    zcache_.clear(); +  } + +  prob_t Likelihood() const { +    return Likelihood(alpha_, pnull_.as_float()); +  } + +  prob_t Likelihood(double alpha, double ppnull) const { +    const prob_t pnull(ppnull); +    const prob_t pnotnull(1 - ppnull); + +    prob_t p; +    p.logeq(Md::log_gamma_density(alpha, 0.1, 25));  // TODO configure +    assert(!p.is_0()); +    prob_t prob_of_ppnull; prob_of_ppnull.logeq(Md::log_beta_density(ppnull, 2, 10)); +    assert(!prob_of_ppnull.is_0()); +    p *= prob_of_ppnull; +    for (ObsCount::const_iterator it = obs_.begin(); it != obs_.end(); ++it) { +      const AlignmentObservation& ao = it->first; +      if (ao.a_j) { +        prob_t u = XUnnormalizedProb(ao.a_j, ao.j, ao.src_len, ao.trg_len, alpha); +        prob_t z = XComputeZ(ao.j, ao.src_len, ao.trg_len, alpha); +        prob_t pa(u / z); +        pa *= pnotnull; +        pa.poweq(it->second); +        p *= pa; +      } else { +        p *= pnull.pow(it->second); +      } +    } +    return p; +  } + + private: +  static prob_t XUnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) { +    prob_t p; +    p.logeq(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha); +    return p; +  } + +  static prob_t XComputeZ(unsigned j, unsigned src_len, unsigned trg_len, double alpha) { +    prob_t z = prob_t::Zero(); +    for (int a_j = 1; a_j <= src_len; ++a_j) +      z += XUnnormalizedProb(a_j, j, src_len, trg_len, alpha); +    return z; +  } + +  static double UnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) { +    return exp(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha); +  } + +  static double ComputeZ(unsigned j, unsigned src_len, unsigned trg_len, double alpha) { +    double z = 0; +    for (int a_j = 1; a_j <= src_len; ++a_j) +      z += UnnormalizedProb(a_j, j, src_len, trg_len, alpha); +    return z; +  } + +  const double& GetOrComputeZ(unsigned j, unsigned src_len, unsigned trg_len) const { +    if (src_len >= zcache_.size()) +      zcache_.resize(src_len + 1); +    if (trg_len >= zcache_[src_len].size()) +      zcache_[src_len].resize(trg_len + 1); +    std::vector<double>& zv = zcache_[src_len][trg_len]; +    if (zv.size() == 0) +      zv.resize(trg_len); +    double& z = zv[j]; +    if (!z) +      z = ComputeZ(j, src_len, trg_len, alpha_); +    return z; +  } + +  double alpha_; +  prob_t pnull_; +  prob_t pnotnull_; +  mutable std::vector<std::vector<std::vector<double> > > zcache_; +  typedef std::tr1::unordered_map<AlignmentObservation, int, boost::hash<AlignmentObservation> > ObsCount; +  ObsCount obs_; +}; + +#endif | 
