diff options
| author | Chris Dyer <cdyer@cs.cmu.edu> | 2011-10-11 12:06:32 +0100 | 
|---|---|---|
| committer | Chris Dyer <cdyer@cs.cmu.edu> | 2011-10-11 12:06:32 +0100 | 
| commit | af159e4c7066ea9a96f077e7e9265c8571f02053 (patch) | |
| tree | 752d77d7dab832cf24118ef9f682eeb33097f0d1 /gi/pf/base_measures.cc | |
| parent | 52e09d888692be28174ddf21afbae004d84c0d89 (diff) | |
check in some experimental particle filtering code, some gitignore fixes
Diffstat (limited to 'gi/pf/base_measures.cc')
| -rw-r--r-- | gi/pf/base_measures.cc | 112 | 
1 files changed, 112 insertions, 0 deletions
| diff --git a/gi/pf/base_measures.cc b/gi/pf/base_measures.cc new file mode 100644 index 00000000..f8ddfd32 --- /dev/null +++ b/gi/pf/base_measures.cc @@ -0,0 +1,112 @@ +#include "base_measures.h" + +#include <iostream> + +#include "filelib.h" + +using namespace std; + +void Model1::LoadModel1(const string& fname) { +  cerr << "Loading Model 1 parameters from " << fname << " ..." << endl; +  ReadFile rf(fname); +  istream& in = *rf.stream(); +  string line; +  unsigned lc = 0; +  while(getline(in, line)) { +    ++lc; +    int cur = 0; +    int start = 0; +    while(cur < line.size() && line[cur] != ' ') { ++cur; } +    assert(cur != line.size()); +    line[cur] = 0; +    const WordID src = TD::Convert(&line[0]); +    ++cur; +    start = cur; +    while(cur < line.size() && line[cur] != ' ') { ++cur; } +    assert(cur != line.size()); +    line[cur] = 0; +    WordID trg = TD::Convert(&line[start]); +    const double logprob = strtod(&line[cur + 1], NULL); +    if (src >= ttable.size()) ttable.resize(src + 1); +    ttable[src][trg].logeq(logprob); +  } +  cerr << "  read " << lc << " parameters.\n"; +} + +prob_t PhraseConditionalBase::p0(const vector<WordID>& vsrc, +                                 const vector<WordID>& vtrg, +                                 int start_src, int start_trg) const { +  const int flen = vsrc.size() - start_src; +  const int elen = vtrg.size() - start_trg; +  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); +  prob_t p; +  p.logeq(log_poisson(elen, flen + 0.01));       // elen | flen          ~Pois(flen + 0.01) +  for (int i = 0; i < elen; ++i) {               // for each position i in e-RHS +    const WordID trg = vtrg[i + start_trg]; +    prob_t tp = prob_t::Zero(); +    for (int j = -1; j < flen; ++j) { +      const WordID src = j < 0 ? 0 : vsrc[j + start_src]; +      tp += kM1MIXTURE * model1(src, trg); +      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; +    } +    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform +    p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform +  } +  if (p.is_0()) { +    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; +    abort(); +  } +  return p; +} + +prob_t PhraseJointBase::p0(const vector<WordID>& vsrc, +                           const vector<WordID>& vtrg, +                           int start_src, int start_trg) const { +  const int flen = vsrc.size() - start_src; +  const int elen = vtrg.size() - start_trg; +  prob_t uniform_src_alignment; uniform_src_alignment.logeq(-log(flen + 1)); +  prob_t p; +  p.logeq(log_poisson(flen, 1.0));               // flen                 ~Pois(1) +                                                 // elen | flen          ~Pois(flen + 0.01) +  prob_t ptrglen; ptrglen.logeq(log_poisson(elen, flen + 0.01)); +  p *= ptrglen; +  p *= kUNIFORM_SOURCE.pow(flen);                // each f in F ~Uniform +  for (int i = 0; i < elen; ++i) {               // for each position i in E +    const WordID trg = vtrg[i + start_trg]; +    prob_t tp = prob_t::Zero(); +    for (int j = -1; j < flen; ++j) { +      const WordID src = j < 0 ? 0 : vsrc[j + start_src]; +      tp += kM1MIXTURE * model1(src, trg); +      tp += kUNIFORM_MIXTURE * kUNIFORM_TARGET; +    } +    tp *= uniform_src_alignment;                 //     draw a_i         ~uniform +    p *= tp;                                     //     draw e_i         ~Model1(f_a_i) / uniform +  } +  if (p.is_0()) { +    cerr << "Zero! " << vsrc << "\nTRG=" << vtrg << endl; +    abort(); +  } +  return p; +} + +JumpBase::JumpBase() : p(200) { +  for (unsigned src_len = 1; src_len < 200; ++src_len) { +    map<int, prob_t>& cpd = p[src_len]; +    int min_jump = 1 - src_len; +    int max_jump = src_len; +    prob_t z; +    for (int j = min_jump; j <= max_jump; ++j) { +      prob_t& cp = cpd[j]; +      if (j < 0) +        cp.logeq(log_poisson(1.5-j, 1)); +      else if (j > 0) +        cp.logeq(log_poisson(j, 1)); +      cp.poweq(0.2); +      z += cp; +    } +    for (int j = min_jump; j <= max_jump; ++j) { +      cpd[j] /= z; +    } +  } +} + | 
