summaryrefslogtreecommitdiff
path: root/gi/pf/base_measures.cc
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-10-20 02:31:25 +0200
committerPatrick Simianer <p@simianer.de>2011-10-20 02:31:25 +0200
commita5a92ebe23c5819ed104313426012011e32539da (patch)
tree3416818c758d5ece4e71fe522c571e75ea04f100 /gi/pf/base_measures.cc
parentb88332caac2cbe737c99b8098813f868ca876d8b (diff)
parent78baccbb4231bb84a456702d4f574f8e601a8182 (diff)
finalized merge
Diffstat (limited to 'gi/pf/base_measures.cc')
-rw-r--r--gi/pf/base_measures.cc112
1 files changed, 112 insertions, 0 deletions
diff --git a/gi/pf/base_measures.cc b/gi/pf/base_measures.cc
new file mode 100644
index 00000000..f8ddfd32
--- /dev/null
+++ b/gi/pf/base_measures.cc
@@ -0,0 +1,112 @@
+#include "base_measures.h"
+
+#include <iostream>
+
+#include "filelib.h"
+
+using namespace std;
+
+void Model1::LoadModel1(const string& fname) {
+ cerr << "Loading Model 1 parameters from " << fname << " ..." << endl;
+ ReadFile rf(fname);
+ istream& in = *rf.stream();
+ string line;
+ unsigned lc = 0;
+ while(getline(in, line)) {
+ ++lc;
+ int cur = 0;
+ int start = 0;
+ while(cur < line.size() && line[cur] != ' ') { ++cur; }
+ assert(cur != line.size());
+ line[cur] = 0;
+ const WordID src = TD::Convert(&line[0]);
+ ++cur;
+ start = cur;
+ while(cur < line.size() && line[cur] != ' ') { ++cur; }
+ assert(cur != line.size());
+ line[cur] = 0;
+ WordID trg = TD::Convert(&line[start]);
+ const double logprob = strtod(&line[cur + 1], NULL);
+ if (src >= ttable.size()) ttable.resize(src + 1);
+ ttable[src][trg].logeq(logprob);
+ }
+ cerr << " read " << lc << " parameters.\n";
+}
+
+prob_t PhraseConditionalBase::p0(const vector<WordID>& vsrc,
+ const vector<WordID>& vtrg,
+ int start_src, int start_trg) const {
+ const int flen = vsrc.size() - start_src;
+ const int elen = vtrg.size() - start_trg;
+ prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
+ prob_t p;
+ p.logeq(log_poisson(elen, flen + 0.01)); // elen | flen ~Pois(flen + 0.01)
+ for (int i = 0; i < elen; ++i) { // for each position i in e-RHS
+ const WordID trg = vtrg[i + start_trg];
+ prob_t tp = prob_t::Zero();
+ for (int j = -1; j < flen; ++j) {
+ const WordID src = j < 0 ? 0 : vsrc[j + start_src];
+ tp += kM1MIXTURE * model1(src, trg);
+ tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
+ }
+ tp *= uniform_src_alignment; // draw a_i ~uniform
+ p *= tp; // draw e_i ~Model1(f_a_i) / uniform
+ }
+ if (p.is_0()) {
+ cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
+ abort();
+ }
+ return p;
+}
+
+prob_t PhraseJointBase::p0(const vector<WordID>& vsrc,
+ const vector<WordID>& vtrg,
+ int start_src, int start_trg) const {
+ const int flen = vsrc.size() - start_src;
+ const int elen = vtrg.size() - start_trg;
+ prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1));
+ prob_t p;
+ p.logeq(log_poisson(flen, 1.0)); // flen ~Pois(1)
+ // elen | flen ~Pois(flen + 0.01)
+ prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01));
+ p *= ptrglen;
+ p *= kUNIFORM_SOURCE.pow(flen); // each f in F ~Uniform
+ for (int i = 0; i < elen; ++i) { // for each position i in E
+ const WordID trg = vtrg[i + start_trg];
+ prob_t tp = prob_t::Zero();
+ for (int j = -1; j < flen; ++j) {
+ const WordID src = j < 0 ? 0 : vsrc[j + start_src];
+ tp += kM1MIXTURE * model1(src, trg);
+ tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET;
+ }
+ tp *= uniform_src_alignment; // draw a_i ~uniform
+ p *= tp; // draw e_i ~Model1(f_a_i) / uniform
+ }
+ if (p.is_0()) {
+ cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl;
+ abort();
+ }
+ return p;
+}
+
+JumpBase::JumpBase() : p(200) {
+ for (unsigned src_len = 1; src_len < 200; ++src_len) {
+ map<int, prob_t>& cpd = p[src_len];
+ int min_jump = 1 - src_len;
+ int max_jump = src_len;
+ prob_t z;
+ for (int j = min_jump; j <= max_jump; ++j) {
+ prob_t& cp = cpd[j];
+ if (j < 0)
+ cp.logeq(log_poisson(1.5-j, 1));
+ else if (j > 0)
+ cp.logeq(log_poisson(j, 1));
+ cp.poweq(0.2);
+ z += cp;
+ }
+ for (int j = min_jump; j <= max_jump; ++j) {
+ cpd[j] /= z;
+ }
+ }
+}
+