summaryrefslogtreecommitdiff
path: root/gi/pf/hpyp_tm.cc
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2012-10-22 12:07:20 +0100
committerKenneth Heafield <github@kheafield.com>2012-10-22 12:07:20 +0100
commitac586bc9b156b4ae687cd5961ba1fe7b20ec57d6 (patch)
tree052473b46d7fa18d51f897cdb9e7c93a7186dafd /gi/pf/hpyp_tm.cc
parent97b85c082b3e55c28a8b0c0eb762483ac84a1577 (diff)
parentad6d4a1b2519896f2b16a282699ce4e64041fab8 (diff)
Merge remote branch 'upstream/master'
Conflicts: Jamroot bjam decoder/Jamfile decoder/cdec.cc dpmert/Jamfile jam-files/sanity.jam klm/lm/Jamfile klm/util/Jamfile mira/Jamfile
Diffstat (limited to 'gi/pf/hpyp_tm.cc')
-rw-r--r--gi/pf/hpyp_tm.cc133
1 files changed, 0 insertions, 133 deletions
diff --git a/gi/pf/hpyp_tm.cc b/gi/pf/hpyp_tm.cc
deleted file mode 100644
index f362d3f8..00000000
--- a/gi/pf/hpyp_tm.cc
+++ /dev/null
@@ -1,133 +0,0 @@
-#include "hpyp_tm.h"
-
-#include <tr1/unordered_map>
-#include <iostream>
-#include <queue>
-
-#include "tdict.h"
-#include "ccrp.h"
-#include "pyp_word_model.h"
-#include "tied_resampler.h"
-
-using namespace std;
-using namespace std::tr1;
-
-struct FreqBinner {
- FreqBinner(const std::string& fname) { fd_.Load(fname); }
- unsigned NumberOfBins() const { return fd_.Max() + 1; }
- unsigned Bin(const WordID& w) const { return fd_.LookUp(w); }
- FreqDict<unsigned> fd_;
-};
-
-template <typename Base, class Binner = FreqBinner>
-struct ConditionalPYPWordModel {
- ConditionalPYPWordModel(Base* b, const Binner* bnr = NULL) :
- base(*b),
- binner(bnr),
- btr(binner ? binner->NumberOfBins() + 1u : 2u) {}
-
- void Summary() const {
- cerr << "Number of conditioning contexts: " << r.size() << endl;
- for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) {
- cerr << TD::Convert(it->first) << " \tPYP(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << endl;
- for (CCRP<vector<WordID> >::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2)
- cerr << " " << i2->second << endl;
- }
- }
-
- void ResampleHyperparameters(MT19937* rng) {
- btr.ResampleHyperparameters(rng);
- }
-
- prob_t Prob(const WordID src, const vector<WordID>& trglets) const {
- RuleModelHash::const_iterator it = r.find(src);
- if (it == r.end()) {
- return base(trglets);
- } else {
- return it->second.prob(trglets, base(trglets));
- }
- }
-
- void Increment(const WordID src, const vector<WordID>& trglets, MT19937* rng) {
- RuleModelHash::iterator it = r.find(src);
- if (it == r.end()) {
- it = r.insert(make_pair(src, CCRP<vector<WordID> >(0.5,1.0))).first;
- static const WordID kNULL = TD::Convert("NULL");
- unsigned bin = (src == kNULL ? 0 : 1);
- if (binner && bin) { bin = binner->Bin(src) + 1; }
- btr.Add(bin, &it->second);
- }
- if (it->second.increment(trglets, base(trglets), rng))
- base.Increment(trglets, rng);
- }
-
- void Decrement(const WordID src, const vector<WordID>& trglets, MT19937* rng) {
- RuleModelHash::iterator it = r.find(src);
- assert(it != r.end());
- if (it->second.decrement(trglets, rng)) {
- base.Decrement(trglets, rng);
- }
- }
-
- prob_t Likelihood() const {
- prob_t p = prob_t::One();
- for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) {
- prob_t q; q.logeq(it->second.log_crp_prob());
- p *= q;
- }
- return p;
- }
-
- unsigned UniqueConditioningContexts() const {
- return r.size();
- }
-
- // TODO tie PYP hyperparameters based on source word frequency bins
- Base& base;
- const Binner* binner;
- BinTiedResampler<CCRP<vector<WordID> > > btr;
- typedef unordered_map<WordID, CCRP<vector<WordID> > > RuleModelHash;
- RuleModelHash r;
-};
-
-HPYPLexicalTranslation::HPYPLexicalTranslation(const vector<vector<WordID> >& lets,
- const unsigned vocab_size,
- const unsigned num_letters) :
- letters(lets),
- base(vocab_size, num_letters, 5),
- up0(new PYPWordModel<PoissonUniformWordModel>(&base)),
- tmodel(new ConditionalPYPWordModel<PYPWordModel<PoissonUniformWordModel> >(up0, new FreqBinner("10k.freq"))),
- kX(-TD::Convert("X")) {}
-
-void HPYPLexicalTranslation::Summary() const {
- tmodel->Summary();
- up0->Summary();
-}
-
-prob_t HPYPLexicalTranslation::Likelihood() const {
- prob_t p = up0->Likelihood();
- p *= tmodel->Likelihood();
- return p;
-}
-
-void HPYPLexicalTranslation::ResampleHyperparameters(MT19937* rng) {
- tmodel->ResampleHyperparameters(rng);
- up0->ResampleHyperparameters(rng);
-}
-
-unsigned HPYPLexicalTranslation::UniqueConditioningContexts() const {
- return tmodel->UniqueConditioningContexts();
-}
-
-prob_t HPYPLexicalTranslation::Prob(WordID src, WordID trg) const {
- return tmodel->Prob(src, letters[trg]);
-}
-
-void HPYPLexicalTranslation::Increment(WordID src, WordID trg, MT19937* rng) {
- tmodel->Increment(src, letters[trg], rng);
-}
-
-void HPYPLexicalTranslation::Decrement(WordID src, WordID trg, MT19937* rng) {
- tmodel->Decrement(src, letters[trg], rng);
-}
-