summaryrefslogtreecommitdiff
path: root/gi/pf
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-03-10 12:56:15 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2012-03-10 12:56:15 -0500
commitf06c3f8d9dc2ce66153890809a7fc9b296ee625e (patch)
tree60ae6cb8714559ea1aa07e097af618281eb0310e /gi/pf
parent1c92df11360cda4be57183bfb4efa2d62107c651 (diff)
ready to infer alignment parameters
Diffstat (limited to 'gi/pf')
-rw-r--r--gi/pf/Makefile.am4
-rw-r--r--gi/pf/align-lexonly-pyp.cc22
-rw-r--r--gi/pf/align-lexonly.cc332
-rw-r--r--gi/pf/pyp_tm.cc6
-rw-r--r--gi/pf/quasi_model2.h115
-rw-r--r--gi/pf/tied_resampler.h31
6 files changed, 143 insertions, 367 deletions
diff --git a/gi/pf/Makefile.am b/gi/pf/Makefile.am
index 4ce72ba1..f9c979d0 100644
--- a/gi/pf/Makefile.am
+++ b/gi/pf/Makefile.am
@@ -1,4 +1,4 @@
-bin_PROGRAMS = cbgi brat dpnaive pfbrat pfdist itg pfnaive condnaive align-lexonly align-lexonly-pyp learn_cfg pyp_lm nuisance_test align-tl
+bin_PROGRAMS = cbgi brat dpnaive pfbrat pfdist itg pfnaive condnaive align-lexonly-pyp learn_cfg pyp_lm nuisance_test align-tl
noinst_LIBRARIES = libpf.a
@@ -7,8 +7,6 @@ libpf_a_SOURCES = base_distributions.cc reachability.cc cfg_wfst_composer.cc cor
nuisance_test_SOURCES = nuisance_test.cc
nuisance_test_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
-align_lexonly_SOURCES = align-lexonly.cc
-
align_lexonly_pyp_SOURCES = align-lexonly-pyp.cc
align_lexonly_pyp_LDADD = libpf.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a $(top_srcdir)/klm/lm/libklm.a $(top_srcdir)/klm/util/libklm_util.a -lz
diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc
index 0c90b6ce..68cb9192 100644
--- a/gi/pf/align-lexonly-pyp.cc
+++ b/gi/pf/align-lexonly-pyp.cc
@@ -61,15 +61,15 @@ struct AlignedSentencePair {
struct Aligner {
Aligner(const vector<vector<WordID> >& lets, int num_letters, vector<AlignedSentencePair>* c) :
corpus(*c),
+ paj_model(4, 0.08),
model(lets, num_letters),
- paj(4, 0.08),
kNULL(TD::Convert("NULL")) {
assert(lets[kNULL].size() == 0);
}
vector<AlignedSentencePair>& corpus;
+ QuasiModel2 paj_model;
PYPLexicalTranslation model;
- const QuasiModel2 paj;
const WordID kNULL;
void ResampleHyperparameters() {
@@ -86,10 +86,12 @@ struct Aligner {
a_j = prng->next() * (1 + asp.src.size());
const WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL);
model.Increment(f_a_j, asp.trg[j], &*prng);
- // TODO factor in alignment prob
+ paj_model.Increment(a_j, j, asp.src.size(), asp.trg.size());
}
}
- cerr << "Corpus intialized randomly. LLH = " << model.Likelihood() << endl;
+ cerr << "Corpus intialized randomly." << endl;
+ cerr << "LLH = " << Likelihood() << " \t(Amodel=" << paj_model.Likelihood()
+ << " TModel=" << model.Likelihood() << ") contexts=" << model.UniqueConditioningContexts() << endl;
}
void ResampleCorpus() {
@@ -101,19 +103,25 @@ struct Aligner {
const WordID e_j = asp.trg[j];
WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL);
model.Decrement(f_a_j, e_j, prng);
+ paj_model.Decrement(a_j, j, asp.src.size(), asp.trg.size());
for (unsigned prop_a_j = 0; prop_a_j <= asp.src.size(); ++prop_a_j) {
const WordID prop_f = (prop_a_j ? asp.src[prop_a_j - 1] : kNULL);
ss[prop_a_j] = model.Prob(prop_f, e_j);
- // TODO configurable
- ss[prop_a_j] *= paj.Pa_j(prop_a_j, j, asp.src.size(), asp.trg.size());
+ ss[prop_a_j] *= paj_model.Prob(prop_a_j, j, asp.src.size(), asp.trg.size());
}
a_j = prng->SelectSample(ss);
f_a_j = (a_j ? asp.src[a_j - 1] : kNULL);
model.Increment(f_a_j, e_j, prng);
+ paj_model.Increment(a_j, j, asp.src.size(), asp.trg.size());
}
}
- cerr << "LLH = " << model.Likelihood() << " " << model.UniqueConditioningContexts() << endl;
+ cerr << "LLH = " << Likelihood() << " \t(Amodel=" << paj_model.Likelihood()
+ << " TModel=" << model.Likelihood() << ") contexts=" << model.UniqueConditioningContexts() << endl;
+ }
+
+ prob_t Likelihood() const {
+ return model.Likelihood() * paj_model.Likelihood();
}
};
diff --git a/gi/pf/align-lexonly.cc b/gi/pf/align-lexonly.cc
deleted file mode 100644
index dbc9dc07..00000000
--- a/gi/pf/align-lexonly.cc
+++ /dev/null
@@ -1,332 +0,0 @@
-#include <iostream>
-#include <tr1/memory>
-#include <queue>
-
-#include <boost/multi_array.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "array2d.h"
-#include "base_distributions.h"
-#include "monotonic_pseg.h"
-#include "conditional_pseg.h"
-#include "trule.h"
-#include "tdict.h"
-#include "stringlib.h"
-#include "filelib.h"
-#include "dict.h"
-#include "sampler.h"
-#include "ccrp_nt.h"
-#include "corpus.h"
-#include "ngram_base.h"
-
-using namespace std;
-using namespace tr1;
-namespace po = boost::program_options;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
- po::options_description opts("Configuration options");
- opts.add_options()
- ("samples,s",po::value<unsigned>()->default_value(1000),"Number of samples")
- ("input,i",po::value<string>(),"Read parallel data from")
- ("random_seed,S",po::value<uint32_t>(), "Random seed");
- po::options_description clo("Command line options");
- clo.add_options()
- ("config", po::value<string>(), "Configuration file")
- ("help,h", "Print this help message and exit");
- po::options_description dconfig_options, dcmdline_options;
- dconfig_options.add(opts);
- dcmdline_options.add(opts).add(clo);
-
- po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
- if (conf->count("config")) {
- ifstream config((*conf)["config"].as<string>().c_str());
- po::store(po::parse_config_file(config, dconfig_options), *conf);
- }
- po::notify(*conf);
-
- if (conf->count("help") || (conf->count("input") == 0)) {
- cerr << dcmdline_options << endl;
- exit(1);
- }
-}
-
-shared_ptr<MT19937> prng;
-
-struct LexicalAlignment {
- unsigned char src_index;
- bool is_transliteration;
- vector<pair<short, short> > derivation;
-};
-
-struct AlignedSentencePair {
- vector<WordID> src;
- vector<WordID> trg;
- vector<LexicalAlignment> a;
- Array2D<short> posterior;
-};
-
-struct HierarchicalWordBase {
- explicit HierarchicalWordBase(const unsigned vocab_e_size) :
- base(prob_t::One()), r(25,25,10), u0(-log(vocab_e_size)) {}
-
- void ResampleHyperparameters(MT19937* rng) {
- r.resample_hyperparameters(rng);
- }
-
- inline double logp0(const vector<WordID>& s) const {
- return s.size() * u0;
- }
-
- // return p0 of rule.e_
- prob_t operator()(const TRule& rule) const {
- prob_t p; p.logeq(r.logprob(rule.e_, logp0(rule.e_)));
- return p;
- }
-
- void Increment(const TRule& rule) {
- if (r.increment(rule.e_)) {
- prob_t p; p.logeq(logp0(rule.e_));
- base *= p;
- }
- }
-
- void Decrement(const TRule& rule) {
- if (r.decrement(rule.e_)) {
- prob_t p; p.logeq(logp0(rule.e_));
- base /= p;
- }
- }
-
- prob_t Likelihood() const {
- prob_t p; p.logeq(r.log_crp_prob());
- p *= base;
- return p;
- }
-
- void Summary() const {
- cerr << "NUMBER OF CUSTOMERS: " << r.num_customers() << " (\\alpha=" << r.alpha() << ')' << endl;
- for (CCRP_NoTable<vector<WordID> >::const_iterator it = r.begin(); it != r.end(); ++it)
- cerr << " " << it->second << '\t' << TD::GetString(it->first) << endl;
- }
-
- prob_t base;
- CCRP_NoTable<vector<WordID> > r;
- const double u0;
-};
-
-struct BasicLexicalAlignment {
- explicit BasicLexicalAlignment(const vector<vector<WordID> >& lets,
- const unsigned words_e,
- const unsigned letters_e,
- vector<AlignedSentencePair>* corp) :
- letters(lets),
- corpus(*corp),
- up0("fr-en.10k.translit-base.txt.gz"),
- //up0(words_e),
- //up0("en.chars.1gram", letters_e),
- //up0("en.words.1gram"),
- //up0(letters_e),
- //up0("en.chars.2gram"),
- tmodel(up0) {
- }
-
- void InstantiateRule(const WordID src,
- const WordID trg,
- TRule* rule) const {
- static const WordID kX = TD::Convert("X") * -1;
- rule->lhs_ = kX;
- rule->e_ = letters[trg];
- rule->f_ = letters[src];
- }
-
- void InitializeRandom() {
- const WordID kNULL = TD::Convert("NULL");
- cerr << "Initializing with random alignments ...\n";
- for (unsigned i = 0; i < corpus.size(); ++i) {
- AlignedSentencePair& asp = corpus[i];
- asp.a.resize(asp.trg.size());
- for (unsigned j = 0; j < asp.trg.size(); ++j) {
- const unsigned char a_j = prng->next() * (1 + asp.src.size());
- const WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL);
- TRule r;
- InstantiateRule(f_a_j, asp.trg[j], &r);
- asp.a[j].is_transliteration = false;
- asp.a[j].src_index = a_j;
- if (tmodel.IncrementRule(r))
- up0.Increment(r);
- }
- }
- cerr << " LLH = " << Likelihood() << endl;
- }
-
- prob_t Likelihood() const {
- prob_t p = tmodel.Likelihood();
- p *= up0.Likelihood();
- return p;
- }
-
- void ResampleHyperparemeters() {
- cerr << " LLH_prev = " << Likelihood() << flush;
- tmodel.ResampleHyperparameters(&*prng);
- up0.ResampleHyperparameters(&*prng);
- cerr << "\tLLH_post = " << Likelihood() << endl;
- }
-
- void ResampleCorpus();
-
- const vector<vector<WordID> >& letters; // spelling dictionary
- vector<AlignedSentencePair>& corpus;
- //PhraseConditionalUninformativeBase up0;
- //PhraseConditionalUninformativeUnigramBase up0;
- //UnigramWordBase up0;
- //HierarchicalUnigramBase up0;
- TableLookupBase up0;
- //HierarchicalWordBase up0;
- //PoissonUniformUninformativeBase up0;
- //CompletelyUniformBase up0;
- //FixedNgramBase up0;
- //ConditionalTranslationModel<PhraseConditionalUninformativeBase> tmodel;
- //ConditionalTranslationModel<PhraseConditionalUninformativeUnigramBase> tmodel;
- //ConditionalTranslationModel<UnigramWordBase> tmodel;
- //ConditionalTranslationModel<HierarchicalUnigramBase> tmodel;
- //ConditionalTranslationModel<HierarchicalWordBase> tmodel;
- //ConditionalTranslationModel<PoissonUniformUninformativeBase> tmodel;
- ConditionalTranslationModel<TableLookupBase> tmodel;
- //ConditionalTranslationModel<FixedNgramBase> tmodel;
- //ConditionalTranslationModel<CompletelyUniformBase> tmodel;
-};
-
-void BasicLexicalAlignment::ResampleCorpus() {
- static const WordID kNULL = TD::Convert("NULL");
- for (unsigned i = 0; i < corpus.size(); ++i) {
- AlignedSentencePair& asp = corpus[i];
- SampleSet<prob_t> ss; ss.resize(asp.src.size() + 1);
- for (unsigned j = 0; j < asp.trg.size(); ++j) {
- TRule r;
- unsigned char& a_j = asp.a[j].src_index;
- WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL);
- InstantiateRule(f_a_j, asp.trg[j], &r);
- if (tmodel.DecrementRule(r))
- up0.Decrement(r);
-
- for (unsigned prop_a_j = 0; prop_a_j <= asp.src.size(); ++prop_a_j) {
- const WordID prop_f = (prop_a_j ? asp.src[prop_a_j - 1] : kNULL);
- InstantiateRule(prop_f, asp.trg[j], &r);
- ss[prop_a_j] = tmodel.RuleProbability(r);
- }
- a_j = prng->SelectSample(ss);
- f_a_j = (a_j ? asp.src[a_j - 1] : kNULL);
- InstantiateRule(f_a_j, asp.trg[j], &r);
- if (tmodel.IncrementRule(r))
- up0.Increment(r);
- }
- }
- cerr << " LLH = " << tmodel.Likelihood() << endl;
-}
-
-void ExtractLetters(const set<WordID>& v, vector<vector<WordID> >* l, set<WordID>* letset = NULL) {
- for (set<WordID>::const_iterator it = v.begin(); it != v.end(); ++it) {
- if (*it >= l->size()) { l->resize(*it + 1); }
- vector<WordID>& letters = (*l)[*it];
- if (letters.size()) continue; // if e and f have the same word
-
- const string& w = TD::Convert(*it);
-
- size_t cur = 0;
- while (cur < w.size()) {
- const size_t len = UTF8Len(w[cur]);
- letters.push_back(TD::Convert(w.substr(cur, len)));
- if (letset) letset->insert(letters.back());
- cur += len;
- }
- }
-}
-
-void Debug(const AlignedSentencePair& asp) {
- cerr << TD::GetString(asp.src) << endl << TD::GetString(asp.trg) << endl;
- Array2D<bool> a(asp.src.size(), asp.trg.size());
- for (unsigned j = 0; j < asp.trg.size(); ++j)
- if (asp.a[j].src_index) a(asp.a[j].src_index - 1, j) = true;
- cerr << a << endl;
-}
-
-void AddSample(AlignedSentencePair* asp) {
- for (unsigned j = 0; j < asp->trg.size(); ++j)
- asp->posterior(asp->a[j].src_index, j)++;
-}
-
-void WriteAlignments(const AlignedSentencePair& asp) {
- bool first = true;
- for (unsigned j = 0; j < asp.trg.size(); ++j) {
- int src_index = -1;
- int mc = -1;
- for (unsigned i = 0; i <= asp.src.size(); ++i) {
- if (asp.posterior(i, j) > mc) {
- mc = asp.posterior(i, j);
- src_index = i;
- }
- }
-
- if (src_index) {
- if (first) first = false; else cout << ' ';
- cout << (src_index - 1) << '-' << j;
- }
- }
- cout << endl;
-}
-
-int main(int argc, char** argv) {
- po::variables_map conf;
- InitCommandLine(argc, argv, &conf);
-
- if (conf.count("random_seed"))
- prng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
- else
- prng.reset(new MT19937);
-// MT19937& rng = *prng;
-
- vector<vector<int> > corpuse, corpusf;
- set<int> vocabe, vocabf;
- corpus::ReadParallelCorpus(conf["input"].as<string>(), &corpusf, &corpuse, &vocabf, &vocabe);
- cerr << "f-Corpus size: " << corpusf.size() << " sentences\n";
- cerr << "f-Vocabulary size: " << vocabf.size() << " types\n";
- cerr << "f-Corpus size: " << corpuse.size() << " sentences\n";
- cerr << "f-Vocabulary size: " << vocabe.size() << " types\n";
- assert(corpusf.size() == corpuse.size());
-
- vector<AlignedSentencePair> corpus(corpuse.size());
- for (unsigned i = 0; i < corpuse.size(); ++i) {
- corpus[i].src.swap(corpusf[i]);
- corpus[i].trg.swap(corpuse[i]);
- corpus[i].posterior.resize(corpus[i].src.size() + 1, corpus[i].trg.size());
- }
- corpusf.clear(); corpuse.clear();
-
- vocabf.insert(TD::Convert("NULL"));
- vector<vector<WordID> > letters(TD::NumWords());
- set<WordID> letset;
- ExtractLetters(vocabe, &letters, &letset);
- ExtractLetters(vocabf, &letters, NULL);
- letters[TD::Convert("NULL")].clear();
-
- BasicLexicalAlignment x(letters, vocabe.size(), letset.size(), &corpus);
- x.InitializeRandom();
- const unsigned samples = conf["samples"].as<unsigned>();
- for (int i = 0; i < samples; ++i) {
- for (int j = 395; j < 397; ++j) Debug(corpus[j]);
- cerr << i << "\t" << x.tmodel.r.size() << "\t";
- if (i % 10 == 0) x.ResampleHyperparemeters();
- x.ResampleCorpus();
- if (i > (samples / 5) && (i % 10 == 9)) for (int j = 0; j < corpus.size(); ++j) AddSample(&corpus[j]);
- }
- for (unsigned i = 0; i < corpus.size(); ++i)
- WriteAlignments(corpus[i]);
- //ModelAndData posterior(x, &corpus, vocabe, vocabf);
- x.tmodel.Summary();
- x.up0.Summary();
-
- //posterior.Sample();
-
- return 0;
-}
diff --git a/gi/pf/pyp_tm.cc b/gi/pf/pyp_tm.cc
index 73104fe9..bf5a6497 100644
--- a/gi/pf/pyp_tm.cc
+++ b/gi/pf/pyp_tm.cc
@@ -10,7 +10,6 @@
#include "tdict.h"
#include "ccrp.h"
#include "pyp_word_model.h"
-
#include "tied_resampler.h"
using namespace std;
@@ -18,7 +17,7 @@ using namespace std::tr1;
template <typename Base>
struct ConditionalPYPWordModel {
- ConditionalPYPWordModel(Base* b) : base(*b) {}
+ ConditionalPYPWordModel(Base* b) : base(*b), btr(3) {}
void Summary() const {
cerr << "Number of conditioning contexts: " << r.size() << endl;
@@ -32,6 +31,7 @@ struct ConditionalPYPWordModel {
void ResampleHyperparameters(MT19937* rng) {
for (RuleModelHash::iterator it = r.begin(); it != r.end(); ++it)
it->second.resample_hyperparameters(rng);
+ btr.ResampleHyperparameters(rng);
}
prob_t Prob(const WordID src, const vector<WordID>& trglets) const {
@@ -72,7 +72,9 @@ struct ConditionalPYPWordModel {
return r.size();
}
+ // TODO tie PYP hyperparameters based on source word frequency bins
Base& base;
+ BinTiedResampler<CCRP<vector<WordID> > > btr;
typedef unordered_map<WordID, CCRP<vector<WordID> > > RuleModelHash;
RuleModelHash r;
};
diff --git a/gi/pf/quasi_model2.h b/gi/pf/quasi_model2.h
index 0095289f..8ec0a400 100644
--- a/gi/pf/quasi_model2.h
+++ b/gi/pf/quasi_model2.h
@@ -3,44 +3,113 @@
#include <vector>
#include <cmath>
+#include <tr1/unordered_map>
+#include "boost/functional.hpp"
#include "prob.h"
#include "array2d.h"
+struct AlignmentObservation {
+ AlignmentObservation() : src_len(), trg_len(), j(), a_j() {}
+ AlignmentObservation(unsigned sl, unsigned tl, unsigned tw, unsigned sw) :
+ src_len(sl), trg_len(tl), j(tw), a_j(sw) {}
+ unsigned short src_len;
+ unsigned short trg_len;
+ unsigned short j;
+ unsigned short a_j;
+};
+
+inline size_t hash_value(const AlignmentObservation& o) {
+ return reinterpret_cast<const size_t&>(o);
+}
+
+inline bool operator==(const AlignmentObservation& a, const AlignmentObservation& b) {
+ return hash_value(a) == hash_value(b);
+}
+
struct QuasiModel2 {
explicit QuasiModel2(double alpha, double pnull = 0.1) :
alpha_(alpha),
pnull_(pnull),
- pnotnull_(1 - pnull),
- z_(1000,1000) {}
+ pnotnull_(1 - pnull) {}
+
// a_j = 0 => NULL; src_len does *not* include null
- prob_t Pa_j(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) const {
+ prob_t Prob(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) const {
if (!a_j) return pnull_;
- std::vector<prob_t>& zv = z_(src_len, trg_len);
- if (zv.size() == 0)
- zv.resize(trg_len);
-
- prob_t& z = zv[j];
- if (z.is_0()) z = ComputeZ(j, src_len, trg_len);
-
- prob_t p;
- p.logeq(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha_);
- p *= pnotnull_;
- p /= z;
+ return pnotnull_ *
+ prob_t(UnnormalizedProb(a_j, j, src_len, trg_len, alpha_) / GetOrComputeZ(j, src_len, trg_len));
+ }
+
+ void Increment(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) {
+ assert(a_j <= src_len);
+ assert(j < trg_len);
+ ++obs_[AlignmentObservation(src_len, trg_len, j, a_j)];
+ }
+
+ void Decrement(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len) {
+ const AlignmentObservation ao(src_len, trg_len, j, a_j);
+ int &cc = obs_[ao];
+ assert(cc > 0);
+ --cc;
+ if (!cc) obs_.erase(ao);
+ }
+
+ prob_t Likelihood() const {
+ return Likelihood(alpha_, pnull_.as_float());
+ }
+
+ prob_t Likelihood(double alpha, double ppnull) const {
+ const prob_t pnull(ppnull);
+ const prob_t pnotnull(1 - ppnull);
+
+ prob_t p = prob_t::One();
+ for (ObsCount::const_iterator it = obs_.begin(); it != obs_.end(); ++it) {
+ const AlignmentObservation& ao = it->first;
+ if (ao.a_j) {
+ double u = UnnormalizedProb(ao.a_j, ao.j, ao.src_len, ao.trg_len, alpha);
+ double z = ComputeZ(ao.j, ao.src_len, ao.trg_len, alpha);
+ prob_t pa(u / z);
+ pa *= pnotnull;
+ pa.poweq(it->second);
+ p *= pa;
+ } else {
+ p *= pnull.pow(it->second);
+ }
+ }
return p;
}
+
private:
- prob_t ComputeZ(unsigned j, unsigned src_len, unsigned trg_len) const {
- prob_t p, z = prob_t::Zero();
- for (int a_j = 1; a_j <= src_len; ++a_j) {
- p.logeq(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha_);
- z += p;
- }
+ static double UnnormalizedProb(unsigned a_j, unsigned j, unsigned src_len, unsigned trg_len, double alpha) {
+ return exp(-fabs(double(a_j - 1) / src_len - double(j) / trg_len) * alpha);
+ }
+
+ static double ComputeZ(unsigned j, unsigned src_len, unsigned trg_len, double alpha) {
+ double z = 0;
+ for (int a_j = 1; a_j <= src_len; ++a_j)
+ z += UnnormalizedProb(a_j, j, src_len, trg_len, alpha);
return z;
}
+
+ const double& GetOrComputeZ(unsigned j, unsigned src_len, unsigned trg_len) const {
+ if (src_len >= zcache_.size())
+ zcache_.resize(src_len + 1);
+ if (trg_len >= zcache_[src_len].size())
+ zcache_[src_len].resize(trg_len + 1);
+ std::vector<double>& zv = zcache_[src_len][trg_len];
+ if (zv.size() == 0)
+ zv.resize(trg_len);
+ double& z = zv[j];
+ if (!z)
+ z = ComputeZ(j, src_len, trg_len, alpha_);
+ return z;
+ }
+
double alpha_;
- const prob_t pnull_;
- const prob_t pnotnull_;
- mutable Array2D<std::vector<prob_t> > z_;
+ prob_t pnull_;
+ prob_t pnotnull_;
+ mutable std::vector<std::vector<std::vector<double> > > zcache_;
+ typedef std::tr1::unordered_map<AlignmentObservation, int, boost::hash<AlignmentObservation> > ObsCount;
+ ObsCount obs_;
};
#endif
diff --git a/gi/pf/tied_resampler.h b/gi/pf/tied_resampler.h
index 208fb9c7..5a262f9d 100644
--- a/gi/pf/tied_resampler.h
+++ b/gi/pf/tied_resampler.h
@@ -2,6 +2,7 @@
#define _TIED_RESAMPLER_H_
#include <set>
+#include <vector>
#include "sampler.h"
#include "slice_sampler.h"
#include "m.h"
@@ -28,6 +29,10 @@ struct TiedResampler {
crps.erase(crp);
}
+ size_t size() const {
+ return crps.size();
+ }
+
double LogLikelihood(double d, double s) const {
if (s <= -d) return -std::numeric_limits<double>::infinity();
double llh = Md::log_beta_density(d, d_alpha, d_beta) +
@@ -54,6 +59,7 @@ struct TiedResampler {
};
void ResampleHyperparameters(MT19937* rng, const unsigned nloop = 5, const unsigned niterations = 10) {
+ if (size() == 0) { std::cerr << "EMPTY - not resampling\n"; return; }
const DiscountResampler dr(*this);
const AlphaResampler ar(*this);
for (int iter = 0; iter < nloop; ++iter) {
@@ -79,4 +85,29 @@ struct TiedResampler {
double discount, strength;
};
+// split according to some criterion
+template <class CRP>
+struct BinTiedResampler {
+ explicit BinTiedResampler(unsigned nbins) :
+ resamplers(nbins, TiedResampler<CRP>(1,1,1,1)) {}
+
+ void Add(unsigned bin, CRP* crp) {
+ resamplers[bin].Add(crp);
+ }
+
+ void Remove(unsigned bin, CRP* crp) {
+ resamplers[bin].Remove(crp);
+ }
+
+ void ResampleHyperparameters(MT19937* rng) {
+ for (unsigned i = 0; i < resamplers.size(); ++i) {
+ std::cerr << "BIN " << i << " (" << resamplers[i].size() << " CRPs): " << std::flush;
+ resamplers[i].ResampleHyperparameters(rng);
+ }
+ }
+
+ private:
+ std::vector<TiedResampler<CRP> > resamplers;
+};
+
#endif