summaryrefslogtreecommitdiff
path: root/gi/pf/align-lexonly-pyp.cc
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-03-10 00:00:27 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2012-03-10 00:00:27 -0500
commit2e9006a5b153dfe3c0fcedf9f1eaea8866f518a8 (patch)
tree1a0667c31a81efce468cc3c9d6551ed8fe953cb7 /gi/pf/align-lexonly-pyp.cc
parent5f9f400f4359bc14f7231d6eabd76b7ceee737aa (diff)
use quasi model 2 instead of uniform alignments
Diffstat (limited to 'gi/pf/align-lexonly-pyp.cc')
-rw-r--r--gi/pf/align-lexonly-pyp.cc6
1 files changed, 6 insertions, 0 deletions
diff --git a/gi/pf/align-lexonly-pyp.cc b/gi/pf/align-lexonly-pyp.cc
index 4a1d1db6..0c90b6ce 100644
--- a/gi/pf/align-lexonly-pyp.cc
+++ b/gi/pf/align-lexonly-pyp.cc
@@ -11,6 +11,7 @@
#include "sampler.h"
#include "corpus.h"
#include "pyp_tm.h"
+#include "quasi_model2.h"
using namespace std;
namespace po = boost::program_options;
@@ -61,12 +62,14 @@ struct Aligner {
Aligner(const vector<vector<WordID> >& lets, int num_letters, vector<AlignedSentencePair>* c) :
corpus(*c),
model(lets, num_letters),
+ paj(4, 0.08),
kNULL(TD::Convert("NULL")) {
assert(lets[kNULL].size() == 0);
}
vector<AlignedSentencePair>& corpus;
PYPLexicalTranslation model;
+ const QuasiModel2 paj;
const WordID kNULL;
void ResampleHyperparameters() {
@@ -83,6 +86,7 @@ struct Aligner {
a_j = prng->next() * (1 + asp.src.size());
const WordID f_a_j = (a_j ? asp.src[a_j - 1] : kNULL);
model.Increment(f_a_j, asp.trg[j], &*prng);
+ // TODO factor in alignment prob
}
}
cerr << "Corpus intialized randomly. LLH = " << model.Likelihood() << endl;
@@ -101,6 +105,8 @@ struct Aligner {
for (unsigned prop_a_j = 0; prop_a_j <= asp.src.size(); ++prop_a_j) {
const WordID prop_f = (prop_a_j ? asp.src[prop_a_j - 1] : kNULL);
ss[prop_a_j] = model.Prob(prop_f, e_j);
+ // TODO configurable
+ ss[prop_a_j] *= paj.Pa_j(prop_a_j, j, asp.src.size(), asp.trg.size());
}
a_j = prng->SelectSample(ss);
f_a_j = (a_j ? asp.src[a_j - 1] : kNULL);