summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-03-15 22:47:04 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-03-15 22:47:04 -0400
commit4a129e055387baf922f30f1502c6b6efad7dd8eb (patch)
tree677620ce39d792b33dfb77f87de014b0b2bcd972
parenta45af4a3704531a8382cd231f6445b3a33b598a3 (diff)
bayes bayes bayes
-rw-r--r--gi/pf/Makefile.am7
-rw-r--r--gi/pf/align-lexonly-pyp.cc10
-rw-r--r--gi/pf/hpyp_tm.cc133
-rw-r--r--gi/pf/hpyp_tm.h38
-rw-r--r--gi/pf/poisson_uniform_word_model.h50
-rw-r--r--gi/pf/pyp_tm.cc11
-rw-r--r--gi/pf/pyp_tm.h7
-rw-r--r--gi/pf/pyp_word_model.cc20
-rw-r--r--gi/pf/pyp_word_model.h46
-rw-r--r--gi/pf/quasi_model2.h13
-rw-r--r--gi/pf/tied_resampler.h6
-rw-r--r--utils/ccrp.h4
-rw-r--r--utils/mfcr.h4
13 files changed, 288 insertions, 61 deletions
diff --git a/gi/pf/Makefile.am b/gi/pf/Makefile.am
index f9c979d0..d365016b 100644
--- a/gi/pf/Makefile.am
+++ b/gi/pf/Makefile.am
@@ -1,8 +1,11 @@
-bin_PROGRAMS = cbgi brat dpnaive pfbrat pfdist itg pfnaive condnaive align-lexonly-pyp learn_cfg pyp_lm nuisance_test align-tl
+bin_PROGRAMS = cbgi brat dpnaive pfbrat pfdist itg pfnaive condnaive align-lexonly-pyp learn_cfg pyp_lm nuisance_test align-tl pf_test
noinst_LIBRARIES = libpf.a
-libpf_a_SOURCES = base_distributions.cc reachability.cc cfg_wfst_composer.cc corpus.cc unigrams.cc ngram_base.cc transliterations.cc backward.cc pyp_word_model.cc pyp_tm.cc
+libpf_a_SOURCES = base_distributions.cc reachability.cc cfg_wfst_composer.cc corpus.cc unigrams.cc ngram_base.cc transliterations.cc backward.cc hpyp_tm.cc pyp_tm.cc
+
+pf_test_SOURCES = pf_test.cc
+pf_test_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
nuisance_test_SOURCES = nuisance_test.cc
nuisance_test_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc
index 942dcf51..e7509f57 100644
--- a/gi/pf/align-lexonly-pyp.cc
+++ b/gi/pf/align-lexonly-pyp.cc
@@ -11,6 +11,7 @@
#include "sampler.h"
#include "corpus.h"
#include "pyp_tm.h"
+#include "hpyp_tm.h"
#include "quasi_model2.h"
using namespace std;
@@ -61,15 +62,17 @@ struct AlignedSentencePair {
Array2D<short> posterior;
};
+template <class LexicalTranslationModel>
struct Aligner {
Aligner(const vector<vector<WordID> >& lets,
+ int vocab_size,
int num_letters,
const po::variables_map& conf,
vector<AlignedSentencePair>* c) :
corpus(*c),
paj_model(conf["align_alpha"].as<double>(), conf["p_null"].as<double>()),
infer_paj(conf.count("infer_alignment_hyperparameters") > 0),
- model(lets, num_letters),
+ model(lets, vocab_size, num_letters),
kNULL(TD::Convert("NULL")) {
assert(lets[kNULL].size() == 0);
}
@@ -77,7 +80,7 @@ struct Aligner {
vector<AlignedSentencePair>& corpus;
QuasiModel2 paj_model;
const bool infer_paj;
- PYPLexicalTranslation model;
+ LexicalTranslationModel model;
const WordID kNULL;
void ResampleHyperparameters() {
@@ -217,7 +220,8 @@ int main(int argc, char** argv) {
ExtractLetters(vocabf, &letters, NULL);
letters[TD::Convert("NULL")].clear();
- Aligner aligner(letters, letset.size(), conf, &corpus);
+ //Aligner<PYPLexicalTranslation> aligner(letters, vocabe.size(), letset.size(), conf, &corpus);
+ Aligner<HPYPLexicalTranslation> aligner(letters, vocabe.size(), letset.size(), conf, &corpus);
aligner.InitializeRandom();
const unsigned samples = conf["samples"].as<unsigned>();
diff --git a/gi/pf/hpyp_tm.cc b/gi/pf/hpyp_tm.cc
new file mode 100644
index 00000000..784f9958
--- /dev/null
+++ b/gi/pf/hpyp_tm.cc
@@ -0,0 +1,133 @@
+#include "hpyp_tm.h"
+
+#include <tr1/unordered_map>
+#include <iostream>
+#include <queue>
+
+#include "tdict.h"
+#include "ccrp.h"
+#include "pyp_word_model.h"
+#include "tied_resampler.h"
+
+using namespace std;
+using namespace std::tr1;
+
+struct FreqBinner {
+ FreqBinner(const std::string& fname) { fd_.Load(fname); }
+ unsigned NumberOfBins() const { return fd_.Max() + 1; }
+ unsigned Bin(const WordID& w) const { return fd_.LookUp(w); }
+ FreqDict<unsigned> fd_;
+};
+
+template <typename Base, class Binner = FreqBinner>
+struct ConditionalPYPWordModel {
+ ConditionalPYPWordModel(Base* b, const Binner* bnr = NULL) :
+ base(*b),
+ binner(bnr),
+ btr(binner ? binner->NumberOfBins() + 1u : 2u) {}
+
+ void Summary() const {
+ cerr << "Number of conditioning contexts: " << r.size() << endl;
+ for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) {
+ cerr << TD::Convert(it->first) << " \tPYP(d=" << it->second.discount() << ",s=" << it->second.strength() << ") --------------------------" << endl;
+ for (CCRP<vector<WordID> >::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2)
+ cerr << " " << i2->second.total_dish_count_ << '\t' << TD::GetString(i2->first) << endl;
+ }
+ }
+
+ void ResampleHyperparameters(MT19937* rng) {
+ btr.ResampleHyperparameters(rng);
+ }
+
+ prob_t Prob(const WordID src, const vector<WordID>& trglets) const {
+ RuleModelHash::const_iterator it = r.find(src);
+ if (it == r.end()) {
+ return base(trglets);
+ } else {
+ return it->second.prob(trglets, base(trglets));
+ }
+ }
+
+ void Increment(const WordID src, const vector<WordID>& trglets, MT19937* rng) {
+ RuleModelHash::iterator it = r.find(src);
+ if (it == r.end()) {
+ it = r.insert(make_pair(src, CCRP<vector<WordID> >(0.5,1.0))).first;
+ static const WordID kNULL = TD::Convert("NULL");
+ unsigned bin = (src == kNULL ? 0 : 1);
+ if (binner && bin) { bin = binner->Bin(src) + 1; }
+ btr.Add(bin, &it->second);
+ }
+ if (it->second.increment(trglets, base(trglets), rng))
+ base.Increment(trglets, rng);
+ }
+
+ void Decrement(const WordID src, const vector<WordID>& trglets, MT19937* rng) {
+ RuleModelHash::iterator it = r.find(src);
+ assert(it != r.end());
+ if (it->second.decrement(trglets, rng)) {
+ base.Decrement(trglets, rng);
+ }
+ }
+
+ prob_t Likelihood() const {
+ prob_t p = prob_t::One();
+ for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) {
+ prob_t q; q.logeq(it->second.log_crp_prob());
+ p *= q;
+ }
+ return p;
+ }
+
+ unsigned UniqueConditioningContexts() const {
+ return r.size();
+ }
+
+ // TODO tie PYP hyperparameters based on source word frequency bins
+ Base& base;
+ const Binner* binner;
+ BinTiedResampler<CCRP<vector<WordID> > > btr;
+ typedef unordered_map<WordID, CCRP<vector<WordID> > > RuleModelHash;
+ RuleModelHash r;
+};
+
+HPYPLexicalTranslation::HPYPLexicalTranslation(const vector<vector<WordID> >& lets,
+ const unsigned vocab_size,
+ const unsigned num_letters) :
+ letters(lets),
+ base(vocab_size, num_letters, 5),
+ up0(new PYPWordModel<PoissonUniformWordModel>(&base)),
+ tmodel(new ConditionalPYPWordModel<PYPWordModel<PoissonUniformWordModel> >(up0, new FreqBinner("10k.freq"))),
+ kX(-TD::Convert("X")) {}
+
+void HPYPLexicalTranslation::Summary() const {
+ tmodel->Summary();
+ up0->Summary();
+}
+
+prob_t HPYPLexicalTranslation::Likelihood() const {
+ prob_t p = up0->Likelihood();
+ p *= tmodel->Likelihood();
+ return p;
+}
+
+void HPYPLexicalTranslation::ResampleHyperparameters(MT19937* rng) {
+ tmodel->ResampleHyperparameters(rng);
+ up0->ResampleHyperparameters(rng);
+}
+
+unsigned HPYPLexicalTranslation::UniqueConditioningContexts() const {
+ return tmodel->UniqueConditioningContexts();
+}
+
+prob_t HPYPLexicalTranslation::Prob(WordID src, WordID trg) const {
+ return tmodel->Prob(src, letters[trg]);
+}
+
+void HPYPLexicalTranslation::Increment(WordID src, WordID trg, MT19937* rng) {
+ tmodel->Increment(src, letters[trg], rng);
+}
+
+void HPYPLexicalTranslation::Decrement(WordID src, WordID trg, MT19937* rng) {
+ tmodel->Decrement(src, letters[trg], rng);
+}
+
diff --git a/gi/pf/hpyp_tm.h b/gi/pf/hpyp_tm.h
new file mode 100644
index 00000000..af3215ba
--- /dev/null
+++ b/gi/pf/hpyp_tm.h
@@ -0,0 +1,38 @@
+#ifndef HPYP_LEX_TRANS
+#define HPYP_LEX_TRANS
+
+#include <vector>
+#include "wordid.h"
+#include "prob.h"
+#include "sampler.h"
+#include "freqdict.h"
+#include "poisson_uniform_word_model.h"
+
+struct FreqBinner;
+template <class B> struct PYPWordModel;
+template <typename T, class B> struct ConditionalPYPWordModel;
+
+struct HPYPLexicalTranslation {
+ explicit HPYPLexicalTranslation(const std::vector<std::vector<WordID> >& lets,
+ const unsigned vocab_size,
+ const unsigned num_letters);
+
+ prob_t Likelihood() const;
+
+ void ResampleHyperparameters(MT19937* rng);
+ prob_t Prob(WordID src, WordID trg) const; // return p(trg | src)
+ void Summary() const;
+ void Increment(WordID src, WordID trg, MT19937* rng);
+ void Decrement(WordID src, WordID trg, MT19937* rng);
+ unsigned UniqueConditioningContexts() const;
+
+ private:
+ const std::vector<std::vector<WordID> >& letters; // spelling dictionary
+ PoissonUniformWordModel base; // "generator" of English types
+ PYPWordModel<PoissonUniformWordModel>* up0; // model English lexicon
+ ConditionalPYPWordModel<PYPWordModel<PoissonUniformWordModel>, FreqBinner>* tmodel; // translation distributions
+ // (model English word | French word)
+ const WordID kX;
+};
+
+#endif
diff --git a/gi/pf/poisson_uniform_word_model.h b/gi/pf/poisson_uniform_word_model.h
new file mode 100644
index 00000000..76204a0e
--- /dev/null
+++ b/gi/pf/poisson_uniform_word_model.h
@@ -0,0 +1,50 @@
+#ifndef _POISSON_UNIFORM_WORD_MODEL_H_
+#define _POISSON_UNIFORM_WORD_MODEL_H_
+
+#include <cmath>
+#include <vector>
+#include "prob.h"
+#include "m.h"
+
+// len ~ Poisson(lambda)
+// for (1..len)
+// e_i ~ Uniform({Vocabulary})
+struct PoissonUniformWordModel {
+ explicit PoissonUniformWordModel(const unsigned vocab_size,
+ const unsigned alphabet_size,
+ const double mean_len = 5) :
+ lh(prob_t::One()),
+ v0(-std::log(vocab_size)),
+ u0(-std::log(alphabet_size)),
+ mean_length(mean_len) {}
+
+ void ResampleHyperparameters(MT19937*) {}
+
+ inline prob_t operator()(const std::vector<WordID>& s) const {
+ prob_t p;
+ p.logeq(Md::log_poisson(s.size(), mean_length) + s.size() * u0);
+ //p.logeq(v0);
+ return p;
+ }
+
+ inline void Increment(const std::vector<WordID>& w, MT19937*) {
+ lh *= (*this)(w);
+ }
+
+ inline void Decrement(const std::vector<WordID>& w, MT19937 *) {
+ lh /= (*this)(w);
+ }
+
+ inline prob_t Likelihood() const { return lh; }
+
+ void Summary() const {}
+
+ private:
+
+ prob_t lh; // keeps track of the draws from the base distribution
+ const double v0; // uniform log prob of generating a word
+ const double u0; // uniform log prob of generating a letter
+ const double mean_length; // mean length of a word in the base distribution
+};
+
+#endif
diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc
index e21f0267..6bc8a5bf 100644
--- a/gi/pf/pyp_tm.cc
+++ b/gi/pf/pyp_tm.cc
@@ -91,26 +91,23 @@ struct ConditionalPYPWordModel {
};
PYPLexicalTranslation::PYPLexicalTranslation(const vector<vector<WordID> >& lets,
+ const unsigned vocab_size,
const unsigned num_letters) :
letters(lets),
- up0(new PYPWordModel(num_letters)),
- tmodel(new ConditionalPYPWordModel<PYPWordModel>(up0, new FreqBinner("10k.freq"))),
+ base(vocab_size, num_letters, 5),
+ tmodel(new ConditionalPYPWordModel<PoissonUniformWordModel>(&base, new FreqBinner("10k.freq"))),
kX(-TD::Convert("X")) {}
void PYPLexicalTranslation::Summary() const {
tmodel->Summary();
- up0->Summary();
}
prob_t PYPLexicalTranslation::Likelihood() const {
- prob_t p = up0->Likelihood();
- p *= tmodel->Likelihood();
- return p;
+ return tmodel->Likelihood() * base.Likelihood();
}
void PYPLexicalTranslation::ResampleHyperparameters(MT19937* rng) {
tmodel->ResampleHyperparameters(rng);
- up0->ResampleHyperparameters(rng);
}
unsigned PYPLexicalTranslation::UniqueConditioningContexts() const {
diff --git a/gi/pf/pyp_tm.h b/gi/pf/pyp_tm.h
index 63e7c96d..2b076a25 100644
--- a/gi/pf/pyp_tm.h
+++ b/gi/pf/pyp_tm.h
@@ -6,13 +6,14 @@
#include "prob.h"
#include "sampler.h"
#include "freqdict.h"
+#include "poisson_uniform_word_model.h"
struct FreqBinner;
-struct PYPWordModel;
template <typename T, class B> struct ConditionalPYPWordModel;
struct PYPLexicalTranslation {
explicit PYPLexicalTranslation(const std::vector<std::vector<WordID> >& lets,
+ const unsigned vocab_size,
const unsigned num_letters);
prob_t Likelihood() const;
@@ -26,8 +27,8 @@ struct PYPLexicalTranslation {
private:
const std::vector<std::vector<WordID> >& letters; // spelling dictionary
- PYPWordModel* up0; // base distribuction (model English word)
- ConditionalPYPWordModel<PYPWordModel, FreqBinner>* tmodel; // translation distributions
+ PoissonUniformWordModel base; // "generator" of English types
+ ConditionalPYPWordModel<PoissonUniformWordModel, FreqBinner>* tmodel; // translation distributions
// (model English word | French word)
const WordID kX;
};
diff --git a/gi/pf/pyp_word_model.cc b/gi/pf/pyp_word_model.cc
deleted file mode 100644
index 12df4abf..00000000
--- a/gi/pf/pyp_word_model.cc
+++ /dev/null
@@ -1,20 +0,0 @@
-#include "pyp_word_model.h"
-
-#include <iostream>
-
-using namespace std;
-
-void PYPWordModel::ResampleHyperparameters(MT19937* rng) {
- r.resample_hyperparameters(rng);
- cerr << " PYPWordModel(d=" << r.discount() << ",s=" << r.strength() << ")\n";
-}
-
-void PYPWordModel::Summary() const {
- cerr << "PYPWordModel: generations=" << r.num_customers()
- << " PYP(d=" << r.discount() << ",s=" << r.strength() << ')' << endl;
- for (CCRP<vector<WordID> >::const_iterator it = r.begin(); it != r.end(); ++it)
- cerr << " " << it->second.total_dish_count_
- << " (on " << it->second.table_counts_.size() << " tables) "
- << TD::GetString(it->first) << endl;
-}
-
diff --git a/gi/pf/pyp_word_model.h b/gi/pf/pyp_word_model.h
index ff366865..224a9034 100644
--- a/gi/pf/pyp_word_model.h
+++ b/gi/pf/pyp_word_model.h
@@ -11,48 +11,52 @@
#include "os_phrase.h"
// PYP(d,s,poisson-uniform) represented as a CRP
+template <class Base>
struct PYPWordModel {
- explicit PYPWordModel(const unsigned vocab_e_size, const double mean_len = 5) :
- base(prob_t::One()), r(1,1,1,1,0.66,50.0), u0(-std::log(vocab_e_size)), mean_length(mean_len) {}
-
- void ResampleHyperparameters(MT19937* rng);
+ explicit PYPWordModel(Base* b) :
+ base(*b),
+ r(1,1,1,1,0.66,50.0)
+ {}
+
+ void ResampleHyperparameters(MT19937* rng) {
+ r.resample_hyperparameters(rng);
+ std::cerr << " PYPWordModel(d=" << r.discount() << ",s=" << r.strength() << ")\n";
+ }
inline prob_t operator()(const std::vector<WordID>& s) const {
- return r.prob(s, p0(s));
+ return r.prob(s, base(s));
}
inline void Increment(const std::vector<WordID>& s, MT19937* rng) {
- if (r.increment(s, p0(s), rng))
- base *= p0(s);
+ if (r.increment(s, base(s), rng))
+ base.Increment(s, rng);
}
inline void Decrement(const std::vector<WordID>& s, MT19937 *rng) {
if (r.decrement(s, rng))
- base /= p0(s);
+ base.Decrement(s, rng);
}
inline prob_t Likelihood() const {
prob_t p; p.logeq(r.log_crp_prob());
- p *= base;
+ p *= base.Likelihood();
return p;
}
- void Summary() const;
-
- private:
- inline double logp0(const std::vector<WordID>& s) const {
- return Md::log_poisson(s.size(), mean_length) + s.size() * u0;
+ void Summary() const {
+ std::cerr << "PYPWordModel: generations=" << r.num_customers()
+ << " PYP(d=" << r.discount() << ",s=" << r.strength() << ')' << std::endl;
+ for (typename CCRP<std::vector<WordID> >::const_iterator it = r.begin(); it != r.end(); ++it) {
+ std::cerr << " " << it->second.total_dish_count_
+ << " (on " << it->second.table_counts_.size() << " tables) "
+ << TD::GetString(it->first) << std::endl;
+ }
}
- inline prob_t p0(const std::vector<WordID>& s) const {
- prob_t p; p.logeq(logp0(s));
- return p;
- }
+ private:
- prob_t base; // keeps track of the draws from the base distribution
+ Base& base; // keeps track of the draws from the base distribution
CCRP<std::vector<WordID> > r;
- const double u0; // uniform log prob of generating a letter
- const double mean_length; // mean length of a word in the base distribution
};
#endif
diff --git a/gi/pf/quasi_model2.h b/gi/pf/quasi_model2.h
index 588c8f84..4075affe 100644
--- a/gi/pf/quasi_model2.h
+++ b/gi/pf/quasi_model2.h
@@ -9,6 +9,7 @@
#include "array2d.h"
#include "slice_sampler.h"
#include "m.h"
+#include "have_64_bits.h"
struct AlignmentObservation {
AlignmentObservation() : src_len(), trg_len(), j(), a_j() {}
@@ -20,13 +21,23 @@ struct AlignmentObservation {
unsigned short a_j;
};
+#ifdef HAVE_64_BITS
inline size_t hash_value(const AlignmentObservation& o) {
return reinterpret_cast<const size_t&>(o);
}
-
inline bool operator==(const AlignmentObservation& a, const AlignmentObservation& b) {
return hash_value(a) == hash_value(b);
}
+#else
+inline size_t hash_value(const AlignmentObservation& o) {
+ size_t h = 1;
+ boost::hash_combine(h, o.src_len);
+ boost::hash_combine(h, o.trg_len);
+ boost::hash_combine(h, o.j);
+ boost::hash_combine(h, o.a_j);
+ return h;
+}
+#endif
struct QuasiModel2 {
explicit QuasiModel2(double alpha, double pnull = 0.1) :
diff --git a/gi/pf/tied_resampler.h b/gi/pf/tied_resampler.h
index 6f45fbce..a4f4af36 100644
--- a/gi/pf/tied_resampler.h
+++ b/gi/pf/tied_resampler.h
@@ -78,10 +78,8 @@ struct TiedResampler {
std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
std::cerr << "TiedCRPs(d=" << discount << ",s="
<< strength << ") = " << LogLikelihood(discount, strength) << std::endl;
- for (typename std::set<CRP*>::iterator it = crps.begin(); it != crps.end(); ++it) {
- (*it)->set_discount(discount);
- (*it)->set_strength(strength);
- }
+ for (typename std::set<CRP*>::iterator it = crps.begin(); it != crps.end(); ++it)
+ (*it)->set_hyperparameters(discount, strength);
}
private:
std::set<CRP*> crps;
diff --git a/utils/ccrp.h b/utils/ccrp.h
index 4a8b80e7..390d4994 100644
--- a/utils/ccrp.h
+++ b/utils/ccrp.h
@@ -55,6 +55,10 @@ class CCRP {
double discount() const { return discount_; }
double strength() const { return strength_; }
+ void set_hyperparameters(double d, double s) {
+ discount_ = d; strength_ = s;
+ check_hyperparameters();
+ }
void set_discount(double d) { discount_ = d; check_hyperparameters(); }
void set_strength(double a) { strength_ = a; check_hyperparameters(); }
diff --git a/utils/mfcr.h b/utils/mfcr.h
index 886f01ef..4aacb567 100644
--- a/utils/mfcr.h
+++ b/utils/mfcr.h
@@ -73,6 +73,10 @@ class MFCR {
double discount() const { return discount_; }
double strength() const { return strength_; }
+ void set_hyperparameters(double d, double s) {
+ discount_ = d; strength_ = s;
+ check_hyperparameters();
+ }
void set_discount(double d) { discount_ = d; check_hyperparameters(); }
void set_strength(double a) { strength_ = a; check_hyperparameters(); }