diff options
author | Kenneth Heafield <github@kheafield.com> | 2012-10-22 12:07:20 +0100 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2012-10-22 12:07:20 +0100 |
commit | 5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch) | |
tree | 9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /gi/pf/pyp_tm.cc | |
parent | cf9994131993b40be62e90e213b1e11e6b550143 (diff) | |
parent | 21825a09d97c2e0afd20512f306fb25fed55e529 (diff) |
Merge remote branch 'upstream/master'
Conflicts:
Jamroot
bjam
decoder/Jamfile
decoder/cdec.cc
dpmert/Jamfile
jam-files/sanity.jam
klm/lm/Jamfile
klm/util/Jamfile
mira/Jamfile
Diffstat (limited to 'gi/pf/pyp_tm.cc')
-rw-r--r-- | gi/pf/pyp_tm.cc | 128 |
1 files changed, 0 insertions, 128 deletions
diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc deleted file mode 100644 index 37b9a604..00000000 --- a/gi/pf/pyp_tm.cc +++ /dev/null @@ -1,128 +0,0 @@ -#include "pyp_tm.h" - -#include <tr1/unordered_map> -#include <iostream> -#include <queue> - -#include "tdict.h" -#include "ccrp.h" -#include "pyp_word_model.h" -#include "tied_resampler.h" - -using namespace std; -using namespace std::tr1; - -struct FreqBinner { - FreqBinner(const std::string& fname) { fd_.Load(fname); } - unsigned NumberOfBins() const { return fd_.Max() + 1; } - unsigned Bin(const WordID& w) const { return fd_.LookUp(w); } - FreqDict<unsigned> fd_; -}; - -template <typename Base, class Binner = FreqBinner> -struct ConditionalPYPWordModel { - ConditionalPYPWordModel(Base* b, const Binner* bnr = NULL) : - base(*b), - binner(bnr), - btr(binner ? binner->NumberOfBins() + 1u : 2u) {} - - void Summary() const { - cerr << "Number of conditioning contexts: " << r.size() << endl; - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - cerr << TD::Convert(it->first) << " \tPYP(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << endl; - for (CCRP<vector<WordID> >::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2) - cerr << " " << i2->second << '\t' << TD::GetString(i2->first) << endl; - } - } - - void ResampleHyperparameters(MT19937* rng) { - btr.ResampleHyperparameters(rng); - } - - prob_t Prob(const WordID src, const vector<WordID>& trglets) const { - RuleModelHash::const_iterator it = r.find(src); - if (it == r.end()) { - return base(trglets); - } else { - return it->second.prob(trglets, base(trglets)); - } - } - - void Increment(const WordID src, const vector<WordID>& trglets, MT19937* rng) { - RuleModelHash::iterator it = r.find(src); - if (it == r.end()) { - it = r.insert(make_pair(src, CCRP<vector<WordID> >(0.5,1.0))).first; - static const WordID kNULL = TD::Convert("NULL"); - unsigned bin = (src == kNULL ? 0 : 1); - if (binner && bin) { bin = binner->Bin(src) + 1; } - btr.Add(bin, &it->second); - } - if (it->second.increment(trglets, base(trglets), rng)) - base.Increment(trglets, rng); - } - - void Decrement(const WordID src, const vector<WordID>& trglets, MT19937* rng) { - RuleModelHash::iterator it = r.find(src); - assert(it != r.end()); - if (it->second.decrement(trglets, rng)) { - base.Decrement(trglets, rng); - } - } - - prob_t Likelihood() const { - prob_t p = prob_t::One(); - for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) { - prob_t q; q.logeq(it->second.log_crp_prob()); - p *= q; - } - return p; - } - - unsigned UniqueConditioningContexts() const { - return r.size(); - } - - // TODO tie PYP hyperparameters based on source word frequency bins - Base& base; - const Binner* binner; - BinTiedResampler<CCRP<vector<WordID> > > btr; - typedef unordered_map<WordID, CCRP<vector<WordID> > > RuleModelHash; - RuleModelHash r; -}; - -PYPLexicalTranslation::PYPLexicalTranslation(const vector<vector<WordID> >& lets, - const unsigned vocab_size, - const unsigned num_letters) : - letters(lets), - base(vocab_size, num_letters, 5), - tmodel(new ConditionalPYPWordModel<PoissonUniformWordModel>(&base, new FreqBinner("10k.freq"))), - kX(-TD::Convert("X")) {} - -void PYPLexicalTranslation::Summary() const { - tmodel->Summary(); -} - -prob_t PYPLexicalTranslation::Likelihood() const { - return tmodel->Likelihood() * base.Likelihood(); -} - -void PYPLexicalTranslation::ResampleHyperparameters(MT19937* rng) { - tmodel->ResampleHyperparameters(rng); -} - -unsigned PYPLexicalTranslation::UniqueConditioningContexts() const { - return tmodel->UniqueConditioningContexts(); -} - -prob_t PYPLexicalTranslation::Prob(WordID src, WordID trg) const { - return tmodel->Prob(src, letters[trg]); -} - -void PYPLexicalTranslation::Increment(WordID src, WordID trg, MT19937* rng) { - tmodel->Increment(src, letters[trg], rng); -} - -void PYPLexicalTranslation::Decrement(WordID src, WordID trg, MT19937* rng) { - tmodel->Decrement(src, letters[trg], rng); -} - |