diff options
Diffstat (limited to 'gi')
| -rw-r--r-- | gi/pf/align-lexonly-pyp.cc | 13 | ||||
| -rw-r--r-- | gi/pf/conditional_pseg.h | 68 | ||||
| -rw-r--r-- | gi/pf/pyp_lm.cc | 12 | 
3 files changed, 66 insertions, 27 deletions
| diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc index ac0590e0..13a3a487 100644 --- a/gi/pf/align-lexonly-pyp.cc +++ b/gi/pf/align-lexonly-pyp.cc @@ -68,14 +68,14 @@ struct AlignedSentencePair {  struct HierarchicalWordBase {    explicit HierarchicalWordBase(const unsigned vocab_e_size) : -      base(prob_t::One()), r(1,1,1,1), u0(-log(vocab_e_size)), l(1,prob_t::One()), v(1, prob_t::Zero()) {} +      base(prob_t::One()), r(1,1,1,1,0.66,50.0), u0(-log(vocab_e_size)), l(1,prob_t::One()), v(1, prob_t::Zero()) {}    void ResampleHyperparameters(MT19937* rng) {      r.resample_hyperparameters(rng);    }    inline double logp0(const vector<WordID>& s) const { -    return s.size() * u0; +    return Md::log_poisson(s.size(), 7.5) + s.size() * u0;    }    // return p0 of rule.e_ @@ -106,7 +106,7 @@ struct HierarchicalWordBase {    void Summary() const {      cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << "  (d=" << r.discount() << ",s=" << r.strength() << ')' << endl;      for (MFCR<1,vector<WordID> >::const_iterator it = r.begin(); it != r.end(); ++it) -      cerr << "   " << it->second.total_dish_count_ << " (on " << it->second.table_counts_.size() << " tables)" << TD::GetString(it->first) << endl; +      cerr << "   " << it->second.total_dish_count_ << " (on " << it->second.table_counts_.size() << " tables) " << TD::GetString(it->first) << endl;    }    prob_t base; @@ -167,10 +167,9 @@ struct BasicLexicalAlignment {    }    void ResampleHyperparemeters() { -    cerr << "  LLH_prev = " << Likelihood() << flush;      tmodel.ResampleHyperparameters(&*prng);      up0.ResampleHyperparameters(&*prng); -    cerr << "\tLLH_post = " << Likelihood() << endl; +    cerr << "  (base d=" << up0.r.discount() << ",s=" << up0.r.strength() << ")\n";    }    void ResampleCorpus(); @@ -218,7 +217,7 @@ void BasicLexicalAlignment::ResampleCorpus() {          up0.Increment(r);      }    } -  cerr << "  LLH = " << tmodel.Likelihood() << endl; +  cerr << "  LLH = " << Likelihood() << endl;  }  void ExtractLetters(const set<WordID>& v, vector<vector<WordID> >* l, set<WordID>* letset = NULL) { @@ -311,7 +310,7 @@ int main(int argc, char** argv) {    for (int i = 0; i < samples; ++i) {      for (int j = 65; j < 67; ++j) Debug(corpus[j]);      cerr << i << "\t" << x.tmodel.r.size() << "\t"; -    if (i % 10 == 0) x.ResampleHyperparemeters(); +    if (i % 7 == 6) x.ResampleHyperparemeters();      x.ResampleCorpus();      if (i > (samples / 5) && (i % 10 == 9)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]);    } diff --git a/gi/pf/conditional_pseg.h b/gi/pf/conditional_pseg.h index ef73e332..8202778b 100644 --- a/gi/pf/conditional_pseg.h +++ b/gi/pf/conditional_pseg.h @@ -17,21 +17,66 @@  template <typename ConditionalBaseMeasure>  struct MConditionalTranslationModel {    explicit MConditionalTranslationModel(ConditionalBaseMeasure& rcp0) : -    rp0(rcp0), lambdas(1, prob_t::One()), p0s(1) {} +    rp0(rcp0), d(0.5), strength(1.0), lambdas(1, prob_t::One()), p0s(1) {}    void Summary() const {      std::cerr << "Number of conditioning contexts: " << r.size() << std::endl;      for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) {        std::cerr << TD::GetString(it->first) << "   \t(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << std::endl;        for (MFCR<1,TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) -        std::cerr << "   " << -1 << '\t' << i2->first << std::endl; +        std::cerr << "   " << i2->second.total_dish_count_ << '\t' << i2->first << std::endl;      }    } +  double log_likelihood(const double& dd, const double& aa) const { +    if (aa <= -dd) return -std::numeric_limits<double>::infinity(); +    //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1); +    double llh = Md::log_beta_density(dd, 1, 1) + +                 Md::log_gamma_density(dd + aa, 1, 1); +    typename std::tr1::unordered_map<std::vector<WordID>, MFCR<1,TRule>, boost::hash<std::vector<WordID> > >::const_iterator it; +    for (it = r.begin(); it != r.end(); ++it) +      llh += it->second.log_crp_prob(dd, aa); +    return llh; +  } + +  struct DiscountResampler { +    DiscountResampler(const MConditionalTranslationModel& m) : m_(m) {} +    const MConditionalTranslationModel& m_; +    double operator()(const double& proposed_discount) const { +      return m_.log_likelihood(proposed_discount, m_.strength); +    } +  }; + +  struct AlphaResampler { +    AlphaResampler(const MConditionalTranslationModel& m) : m_(m) {} +    const MConditionalTranslationModel& m_; +    double operator()(const double& proposed_strength) const { +      return m_.log_likelihood(m_.d, proposed_strength); +    } +  }; +    void ResampleHyperparameters(MT19937* rng) { -    for (RuleModelHash::iterator it = r.begin(); it != r.end(); ++it) -      it->second.resample_hyperparameters(rng); -  }  +    const unsigned nloop = 5; +    const unsigned niterations = 10; +    DiscountResampler dr(*this); +    AlphaResampler ar(*this); +    for (int iter = 0; iter < nloop; ++iter) { +      strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits<double>::min(), +                              std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); +      double min_discount = std::numeric_limits<double>::min(); +      if (strength < 0.0) min_discount -= strength; +      d = slice_sampler1d(dr, d, *rng, min_discount, +                          1.0, 0.0, niterations, 100*niterations); +    } +    strength = slice_sampler1d(ar, strength, *rng, -d, +                            std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); +    typename std::tr1::unordered_map<std::vector<WordID>, MFCR<1,TRule>, boost::hash<std::vector<WordID> > >::iterator it; +    std::cerr << "MConditionalTranslationModel(d=" << d << ",s=" << strength << ") = " << log_likelihood(d, strength) << std::endl; +    for (it = r.begin(); it != r.end(); ++it) { +      it->second.set_discount(d); +      it->second.set_strength(strength); +    } +  }    int DecrementRule(const TRule& rule, MT19937* rng) {      RuleModelHash::iterator it = r.find(rule.f_); @@ -46,7 +91,7 @@ struct MConditionalTranslationModel {    int IncrementRule(const TRule& rule, MT19937* rng) {      RuleModelHash::iterator it = r.find(rule.f_);      if (it == r.end()) { -      it = r.insert(make_pair(rule.f_, MFCR<1,TRule>(1.0, 1.0, 1.0, 1.0, 1e-9, 4.0))).first; +      it = r.insert(make_pair(rule.f_, MFCR<1,TRule>(d, strength))).first;      }      p0s[0] = rp0(rule);       TableCount delta = it->second.increment(rule, p0s.begin(), lambdas.begin(), rng); @@ -66,15 +111,7 @@ struct MConditionalTranslationModel {    }    prob_t Likelihood() const { -    prob_t p = prob_t::One(); -#if 0 -    for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { -      prob_t q; q.logeq(it->second.log_crp_prob()); -      p *= q; -      for (CCRP_NoTable<TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) -        p *= rp0(i2->first); -    } -#endif +    prob_t p; p.logeq(log_likelihood(d, strength));      return p;    } @@ -83,6 +120,7 @@ struct MConditionalTranslationModel {                                    MFCR<1, TRule>,                                    boost::hash<std::vector<WordID> > > RuleModelHash;    RuleModelHash r; +  double d, strength;    std::vector<prob_t> lambdas;    mutable std::vector<prob_t> p0s;  }; diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc index 7ebada13..104f356b 100644 --- a/gi/pf/pyp_lm.cc +++ b/gi/pf/pyp_lm.cc @@ -18,7 +18,7 @@  // I use templates to handle the recursive formalation of the prior, so  // the order of the model has to be specified here, at compile time: -#define kORDER 3 +#define kORDER 4  using namespace std;  using namespace tr1; @@ -114,7 +114,7 @@ template <unsigned N> struct PYPLM {      if (aa <= -dd) return -std::numeric_limits<double>::infinity();      //double llh = Md::log_beta_density(dd, 10, 3) + Md::log_gamma_density(aa, 1, 1);      double llh = Md::log_beta_density(dd, discount_a, discount_b) + -                 Md::log_gamma_density(aa, strength_s, strength_r); +                 Md::log_gamma_density(aa + dd, strength_s, strength_r);      typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::const_iterator it;      for (it = p.begin(); it != p.end(); ++it)        llh += it->second.log_crp_prob(dd, aa); @@ -141,12 +141,14 @@ template <unsigned N> struct PYPLM {      DiscountResampler dr(*this);      AlphaResampler ar(*this);      for (int iter = 0; iter < nloop; ++iter) { -      strength = slice_sampler1d(ar, strength, *rng, 0.0, +      strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits<double>::min(),                                std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations); -      d = slice_sampler1d(dr, d, *rng, std::numeric_limits<double>::min(), +      double min_discount = std::numeric_limits<double>::min(); +      if (strength < 0.0) min_discount -= strength; +      d = slice_sampler1d(dr, d, *rng, min_discount,                            1.0, 0.0, niterations, 100*niterations);      } -    strength = slice_sampler1d(ar, strength, *rng, 0.0, +    strength = slice_sampler1d(ar, strength, *rng, -d + std::numeric_limits<double>::min(),                              std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);      typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it;      cerr << "PYPLM<" << N << ">(d=" << d << ",a=" << strength << ") = " << log_likelihood(d, strength) << endl; | 
