1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
|
#include "reachability.h"
#include <vector>
#include <iostream>
using namespace std;
struct SState {
SState() : prev_src_covered(), prev_trg_covered() {}
SState(int i, int j) : prev_src_covered(i), prev_trg_covered(j) {}
int prev_src_covered;
int prev_trg_covered;
};
void Reachability::ComputeReachability(int srclen, int trglen, int src_max_phrase_len, int trg_max_phrase_len) {
typedef boost::multi_array<vector<SState>, 2> array_type;
array_type a(boost::extents[srclen + 1][trglen + 1]);
a[0][0].push_back(SState());
for (int i = 0; i < srclen; ++i) {
for (int j = 0; j < trglen; ++j) {
if (a[i][j].size() == 0) continue;
const SState prev(i,j);
for (int k = 1; k <= src_max_phrase_len; ++k) {
if ((i + k) > srclen) continue;
for (int l = 1; l <= trg_max_phrase_len; ++l) {
if ((j + l) > trglen) continue;
a[i + k][j + l].push_back(prev);
}
}
}
}
a[0][0].clear();
//cerr << srclen << "," << trglen << ": Final cell contains " << a[srclen][trglen].size() << " back pointers\n";
if (a[srclen][trglen].empty()) {
cerr << "Sequence pair with lengths (" << srclen << ',' << trglen << ") violates reachability constraints\n";
nodes = 0;
return;
}
typedef boost::multi_array<bool, 2> rarray_type;
rarray_type r(boost::extents[srclen + 1][trglen + 1]);
r[srclen][trglen] = true;
nodes = 0;
for (int i = srclen; i >= 0; --i) {
for (int j = trglen; j >= 0; --j) {
vector<SState>& prevs = a[i][j];
if (!r[i][j]) { prevs.clear(); }
for (int k = 0; k < prevs.size(); ++k) {
r[prevs[k].prev_src_covered][prevs[k].prev_trg_covered] = true;
int src_delta = i - prevs[k].prev_src_covered;
edges[prevs[k].prev_src_covered][prevs[k].prev_trg_covered][src_delta][j - prevs[k].prev_trg_covered] = true;
valid_deltas[prevs[k].prev_src_covered][prevs[k].prev_trg_covered].push_back(make_pair<short,short>(src_delta,j - prevs[k].prev_trg_covered));
short &msd = max_src_delta[prevs[k].prev_src_covered][prevs[k].prev_trg_covered];
if (src_delta > msd) msd = src_delta;
}
}
}
assert(!edges[0][0][1][0]);
assert(!edges[0][0][0][1]);
assert(!edges[0][0][0][0]);
assert(max_src_delta[0][0] > 0);
nodes = 0;
for (int i = 0; i < srclen; ++i) {
for (int j = 0; j < trglen; ++j) {
if (valid_deltas[i][j].size() > 0) {
node_addresses[i][j] = nodes++;
} else {
node_addresses[i][j] = -1;
}
}
}
cerr << "Sequence pair with lengths (" << srclen << ',' << trglen << ") has " << valid_deltas[0][0].size() << " out edges in its root node, " << nodes << " nodes in total, and outside estimate matrix will require " << sizeof(float)*nodes << " bytes\n";
}
|