From e320dd47380f8f3a628073f926a56e4321146ebd Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 10 Mar 2012 00:00:27 -0500 Subject: use quasi model 2 instead of uniform alignments --- gi/pf/align-lexonly-pyp.cc | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'gi/pf/align-lexonly-pyp.cc') diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc index 4a1d1db6..0c90b6ce 100644 --- a/gi/pf/align-lexonly-pyp.cc +++ b/gi/pf/align-lexonly-pyp.cc @@ -11,6 +11,7 @@ #include "sampler.h" #include "corpus.h" #include "pyp_tm.h" +#include "quasi_model2.h" using namespace std; namespace po = boost::program_options; @@ -61,12 +62,14 @@ struct Aligner { Aligner(const vector >& lets, int num_letters, vector* c) : corpus(*c), model(lets, num_letters), + paj(4, 0.08), kNULL(TD::Convert("NULL")) { assert(lets[kNULL].size() == 0); } vector& corpus; PYPLexicalTranslation model; + const QuasiModel2 paj; const WordID kNULL; void ResampleHyperparameters() { @@ -83,6 +86,7 @@ struct Aligner { a_j = prng->next() * (1 + asp.src.size()); const WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); model.Increment(f_a_j, asp.trg[j], &*prng); + // TODO factor in alignment prob } } cerr << "Corpus intialized randomly. LLH = " << model.Likelihood() << endl; @@ -101,6 +105,8 @@ struct Aligner { for (unsigned prop_a_j = 0; prop_a_j <= asp.src.size(); ++prop_a_j) { const WordID prop_f = (prop_a_j ? asp.src[prop_a_j - 1] : kNULL); ss[prop_a_j] = model.Prob(prop_f, e_j); + // TODO configurable + ss[prop_a_j] *= paj.Pa_j(prop_a_j, j, asp.src.size(), asp.trg.size()); } a_j = prng->SelectSample(ss); f_a_j = (a_j ? asp.src[a_j - 1] : kNULL); -- cgit v1.2.3