summaryrefslogtreecommitdiff
path: root/gi
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-03-03 17:16:58 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2012-03-03 17:16:58 -0500
commit2579dd24d3833823527e688196276c2fab381b37 (patch)
treedf25825f29db546549fc469f912cef5a7e32c08f /gi
parente0507d1aa96c6b1348e6a202beb95f63d8662258 (diff)
pyp lm, fixed hyperparameters inference
Diffstat (limited to 'gi')
-rw-r--r--gi/pf/align-lexonly-pyp.cc2
-rw-r--r--gi/pf/align-lexonly.cc2
-rw-r--r--gi/pf/brat.cc2
-rw-r--r--gi/pf/conditional_pseg.h4
-rw-r--r--gi/pf/learn_cfg.cc4
-rw-r--r--gi/pf/pfbrat.cc2
-rw-r--r--gi/pf/pyp_lm.cc70
7 files changed, 72 insertions, 14 deletions
diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc
index e24cb457..4ce7cf62 100644
--- a/gi/pf/align-lexonly-pyp.cc
+++ b/gi/pf/align-lexonly-pyp.cc
@@ -104,7 +104,7 @@ struct HierarchicalWordBase {
}
void Summary() const {
- cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (d=" << r.d() << ",\\alpha=" << r.alpha() << ')' << endl;
+ cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (d=" << r.discount() << ",\\alpha=" << r.alpha() << ')' << endl;
for (MFCR<vector<WordID> >::const_iterator it = r.begin(); it != r.end(); ++it)
cerr << " " << it->second.total_dish_count_ << " (on " << it->second.table_counts_.size() << " tables)" << TD::GetString(it->first) << endl;
}
diff --git a/gi/pf/align-lexonly.cc b/gi/pf/align-lexonly.cc
index 8c1d689f..dbc9dc07 100644
--- a/gi/pf/align-lexonly.cc
+++ b/gi/pf/align-lexonly.cc
@@ -105,7 +105,7 @@ struct HierarchicalWordBase {
}
void Summary() const {
- cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (\\alpha=" << r.concentration() << ')' << endl;
+ cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (\\alpha=" << r.alpha() << ')' << endl;
for (CCRP_NoTable<vector<WordID> >::const_iterator it = r.begin(); it != r.end(); ++it)
cerr << " " << it->second << '\t' << TD::GetString(it->first) << endl;
}
diff --git a/gi/pf/brat.cc b/gi/pf/brat.cc
index 7b60ef23..c2c52760 100644
--- a/gi/pf/brat.cc
+++ b/gi/pf/brat.cc
@@ -191,7 +191,7 @@ struct UniphraseLM {
void ResampleHyperparameters(MT19937* rng) {
phrases_.resample_hyperparameters(rng);
gen_.resample_hyperparameters(rng);
- cerr << " " << phrases_.concentration();
+ cerr << " " << phrases_.alpha();
}
CCRP_NoTable<vector<int> > phrases_;
diff --git a/gi/pf/conditional_pseg.h b/gi/pf/conditional_pseg.h
index 2e9e38fc..f9841cbf 100644
--- a/gi/pf/conditional_pseg.h
+++ b/gi/pf/conditional_pseg.h
@@ -22,7 +22,7 @@ struct MConditionalTranslationModel {
void Summary() const {
std::cerr << "Number of conditioning contexts: " << r.size() << std::endl;
for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) {
- std::cerr << TD::GetString(it->first) << " \t(d=" << it->second.d() << ",\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl;
+ std::cerr << TD::GetString(it->first) << " \t(d=" << it->second.discount() << ",\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl;
for (MFCR<TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2)
std::cerr << " " << -1 << '\t' << i2->first << std::endl;
}
@@ -95,7 +95,7 @@ struct ConditionalTranslationModel {
void Summary() const {
std::cerr << "Number of conditioning contexts: " << r.size() << std::endl;
for (RuleModelHash::const_iterator it = r.begin(); it != r.end(); ++it) {
- std::cerr << TD::GetString(it->first) << " \t(\\alpha = " << it->second.concentration() << ") --------------------------" << std::endl;
+ std::cerr << TD::GetString(it->first) << " \t(\\alpha = " << it->second.alpha() << ") --------------------------" << std::endl;
for (CCRP_NoTable<TRule>::const_iterator i2 = it->second.begin(); i2 != it->second.end(); ++i2)
std::cerr << " " << i2->second << '\t' << i2->first << std::endl;
}
diff --git a/gi/pf/learn_cfg.cc b/gi/pf/learn_cfg.cc
index b2ca029a..5b748311 100644
--- a/gi/pf/learn_cfg.cc
+++ b/gi/pf/learn_cfg.cc
@@ -183,9 +183,9 @@ struct HieroLMModel {
nts[i].resample_hyperparameters(rng);
if (kHIERARCHICAL_PRIOR) {
q0.resample_hyperparameters(rng);
- cerr << "[base d=" << q0.discount() << ", alpha=" << q0.discount() << "]";
+ cerr << "[base d=" << q0.discount() << ", alpha=" << q0.alpha() << "]";
}
- cerr << " d=" << nts[0].discount() << ", alpha=" << nts[0].concentration() << endl;
+ cerr << " d=" << nts[0].discount() << ", alpha=" << nts[0].alpha() << endl;
}
const BaseRuleModel base;
diff --git a/gi/pf/pfbrat.cc b/gi/pf/pfbrat.cc
index 7b60ef23..c2c52760 100644
--- a/gi/pf/pfbrat.cc
+++ b/gi/pf/pfbrat.cc
@@ -191,7 +191,7 @@ struct UniphraseLM {
void ResampleHyperparameters(MT19937* rng) {
phrases_.resample_hyperparameters(rng);
gen_.resample_hyperparameters(rng);
- cerr << " " << phrases_.concentration();
+ cerr << " " << phrases_.alpha();
}
CCRP_NoTable<vector<int> > phrases_;
diff --git a/gi/pf/pyp_lm.cc b/gi/pf/pyp_lm.cc
index 2837e33c..0d85536c 100644
--- a/gi/pf/pyp_lm.cc
+++ b/gi/pf/pyp_lm.cc
@@ -50,16 +50,19 @@ template <unsigned N> struct PYPLM;
// uniform base distribution
template<> struct PYPLM<0> {
- PYPLM(unsigned vs) : p0(1.0 / vs) {}
- void increment(WordID w, const vector<WordID>& context, MT19937* rng) const {}
- void decrement(WordID w, const vector<WordID>& context, MT19937* rng) const {}
+ PYPLM(unsigned vs) : p0(1.0 / vs), draws() {}
+ void increment(WordID w, const vector<WordID>& context, MT19937* rng) { ++draws; }
+ void decrement(WordID w, const vector<WordID>& context, MT19937* rng) { --draws; assert(draws >= 0); }
double prob(WordID w, const vector<WordID>& context) const { return p0; }
+ void resample_hyperparameters(MT19937* rng, const unsigned nloop, const unsigned niterations) {}
+ double log_likelihood() const { return draws * log(p0); }
const double p0;
+ int draws;
};
// represents an N-gram LM
template <unsigned N> struct PYPLM {
- PYPLM(unsigned vs) : backoff(vs) {}
+ PYPLM(unsigned vs) : backoff(vs), d(0.8), alpha(1.0) {}
void increment(WordID w, const vector<WordID>& context, MT19937* rng) {
const double bo = backoff.prob(w, context);
static vector<WordID> lookup(N-1);
@@ -67,7 +70,7 @@ template <unsigned N> struct PYPLM {
lookup[i] = context[context.size() - 1 - i];
typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it = p.find(lookup);
if (it == p.end())
- it = p.insert(make_pair(lookup, CCRP<WordID>(1,1,1,1))).first;
+ it = p.insert(make_pair(lookup, CCRP<WordID>(d,alpha))).first;
if (it->second.increment(w, bo, rng))
backoff.increment(w, context, rng);
}
@@ -89,7 +92,58 @@ template <unsigned N> struct PYPLM {
if (it == p.end()) return bo;
return it->second.prob(w, bo);
}
+
+ double log_likelihood() const {
+ return log_likelihood(d, alpha) + backoff.log_likelihood();
+ }
+
+ double log_likelihood(const double& dd, const double& aa) const {
+ if (aa <= -dd) return -std::numeric_limits<double>::infinity();
+ double llh = Md::log_beta_density(dd, 1, 1) + Md::log_gamma_density(aa, 1, 1);
+ typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::const_iterator it;
+ for (it = p.begin(); it != p.end(); ++it)
+ llh += it->second.log_crp_prob(dd, aa);
+ return llh;
+ }
+
+ struct DiscountResampler {
+ DiscountResampler(const PYPLM& m) : m_(m) {}
+ const PYPLM& m_;
+ double operator()(const double& proposed_discount) const {
+ return m_.log_likelihood(proposed_discount, m_.alpha);
+ }
+ };
+
+ struct AlphaResampler {
+ AlphaResampler(const PYPLM& m) : m_(m) {}
+ const PYPLM& m_;
+ double operator()(const double& proposed_alpha) const {
+ return m_.log_likelihood(m_.d, proposed_alpha);
+ }
+ };
+
+ void resample_hyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) {
+ DiscountResampler dr(*this);
+ AlphaResampler ar(*this);
+ for (int iter = 0; iter < nloop; ++iter) {
+ alpha = slice_sampler1d(ar, alpha, *rng, 0.0,
+ std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
+ d = slice_sampler1d(dr, d, *rng, std::numeric_limits<double>::min(),
+ 1.0, 0.0, niterations, 100*niterations);
+ }
+ alpha = slice_sampler1d(ar, alpha, *rng, 0.0,
+ std::numeric_limits<double>::infinity(), 0.0, niterations, 100*niterations);
+ typename unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > >::iterator it;
+ cerr << "PYPLM<" << N << ">(d=" << d << ",a=" << alpha << ") = " << log_likelihood(d, alpha) << endl;
+ for (it = p.begin(); it != p.end(); ++it) {
+ it->second.set_discount(d);
+ it->second.set_alpha(alpha);
+ }
+ backoff.resample_hyperparameters(rng, nloop, niterations);
+ }
+
PYPLM<N-1> backoff;
+ double d, alpha;
unordered_map<vector<WordID>, CCRP<WordID>, boost::hash<vector<WordID> > > p;
};
@@ -109,7 +163,7 @@ int main(int argc, char** argv) {
cerr << "Reading corpus...\n";
CorpusTools::ReadFromFile(conf["input"].as<string>(), &corpuse, &vocabe);
cerr << "E-corpus size: " << corpuse.size() << " sentences\t (" << vocabe.size() << " word types)\n";
-#define kORDER 5
+#define kORDER 3
PYPLM<kORDER> lm(vocabe.size());
vector<WordID> ctx(kORDER - 1, TD::Convert("<s>"));
int mci = corpuse.size() * 99 / 100;
@@ -126,6 +180,10 @@ int main(int argc, char** argv) {
if (SS > 0) lm.decrement(kEOS, ctx, &rng);
lm.increment(kEOS, ctx, &rng);
}
+ if (SS % 10 == 9) {
+ cerr << " [LLH=" << lm.log_likelihood() << "]" << endl;
+ if (SS % 20 == 19) lm.resample_hyperparameters(&rng);
+ } else { cerr << '.' << flush; }
}
double llh = 0;
unsigned cnt = 0;