summaryrefslogtreecommitdiff
path: root/gi/pf/backward.cc
blob: b92629fd6718250f07e28c9de25696593e8ec017 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#include "backward.h"

#include <queue>
#include <utility>

#include "array2d.h"
#include "reachability.h"
#include "base_distributions.h"

using namespace std;

BackwardEstimator::BackwardEstimator(const string& s2t,
                    const string& t2s) : m1(new Model1(s2t)), m1inv(new Model1(t2s)) {}

BackwardEstimator::~BackwardEstimator() {
  delete m1; m1 = NULL;
  delete m1inv; m1inv = NULL;
}

float BackwardEstimator::ComputeBackwardProb(const std::vector<WordID>& src,
                                             const std::vector<WordID>& trg,
                                             unsigned src_covered,
                                             unsigned trg_covered,
                                             double s2t_ratio) const {
  if (src_covered == src.size() || trg_covered == trg.size()) {
    assert(src_covered == src.size());
    assert(trg_covered == trg.size());
    return 0;
  }
  static const WordID kNULL = TD::Convert("<eps>");
  const prob_t uniform_alignment(1.0 / (src.size() - src_covered + 1));
  // TODO factor in expected length ratio
  prob_t e; e.logeq(Md::log_poisson(trg.size() - trg_covered, (src.size() - src_covered) * s2t_ratio)); // p(trg len remaining | src len remaining)
  for (unsigned j = trg_covered; j < trg.size(); ++j) {
    prob_t p = (*m1)(kNULL, trg[j]) + prob_t(1e-12);
    for (unsigned i = src_covered; i < src.size(); ++i)
      p += (*m1)(src[i], trg[j]);
    if (p.is_0()) {
      cerr << "ERROR: p(" << TD::Convert(trg[j]) << " | " << TD::GetString(src) << ") = 0!\n";
      assert(!"failed");
    }
    p *= uniform_alignment;
    e *= p;
  }
  // TODO factor in expected length ratio
  const prob_t inv_uniform(1.0 / (trg.size() - trg_covered + 1.0));
  prob_t inv;
  inv.logeq(Md::log_poisson(src.size() - src_covered, (trg.size() - trg_covered) / s2t_ratio));
  for (unsigned i = src_covered; i < src.size(); ++i) {
    prob_t p = (*m1inv)(kNULL, src[i]) + prob_t(1e-12);
    for (unsigned j = trg_covered; j < trg.size(); ++j)
      p += (*m1inv)(trg[j], src[i]);
    if (p.is_0()) {
      cerr << "ERROR: p_inv(" << TD::Convert(src[i]) << " | " << TD::GetString(trg) << ") = 0!\n";
      assert(!"failed");
    }
    p *= inv_uniform;
    inv *= p;
  }
  return (log(e) + log(inv)) / 2;
}

void BackwardEstimator::InitializeGrid(const vector<WordID>& src,
                      const vector<WordID>& trg,
                      const Reachability& r,
                      double s2t_ratio,
                      float* grid) const {
  queue<pair<int,int> > q;
  q.push(make_pair(0,0));
  Array2D<bool> done(src.size()+1, trg.size()+1, false);
  //cerr << TD::GetString(src) << " ||| " << TD::GetString(trg) << endl;
  while(!q.empty()) {
    const pair<int,int> n = q.front();
    q.pop();
    if (done(n.first,n.second)) continue;
    done(n.first,n.second) = true;

    float lp = ComputeBackwardProb(src, trg, n.first, n.second, s2t_ratio);
    if (n.first == 0 && n.second == 0) grid[0] = lp;
    //cerr << "  " << n.first << "," << n.second << "\t" << lp << endl;

    if (n.first == src.size() || n.second == trg.size()) continue;
    const vector<pair<short,short> >& edges = r.valid_deltas[n.first][n.second];
    for (int i = 0; i < edges.size(); ++i)
      q.push(make_pair(n.first + edges[i].first, n.second + edges[i].second));
  }
  //static int cc = 0; ++cc; if (cc == 80) exit(1);
}