summaryrefslogtreecommitdiff
path: root/word-aligner/da.h
diff options
context:
space:
mode:
Diffstat (limited to 'word-aligner/da.h')
-rw-r--r--word-aligner/da.h75
1 files changed, 75 insertions, 0 deletions
diff --git a/word-aligner/da.h b/word-aligner/da.h
new file mode 100644
index 00000000..add64a5b
--- /dev/null
+++ b/word-aligner/da.h
@@ -0,0 +1,75 @@
+#ifndef _DA_H_
+#define _DA_H_
+
+#include <cmath>
+#include <cassert>
+
+// m = trg len
+// n = src len
+// i = trg index
+// j = src index
+struct DiagonalAlignment {
+
+ static double UnnormalizedProb(const unsigned i, const unsigned j, const unsigned m, const unsigned n, const double alpha) {
+ assert(i > 0);
+ assert(n > 0);
+ assert(m >= i);
+ assert(n >= j);
+ return exp(feat(i, j, m, n) * alpha);
+ }
+
+ static double ComputeZ(const unsigned i, const unsigned m, const unsigned n, const double alpha) {
+ assert(i > 0);
+ assert(n > 0);
+ assert(m >= i);
+ const double split = double(i) * n / m;
+ const unsigned floor = split;
+ unsigned ceil = floor + 1;
+ const double ratio = exp(-alpha / n);
+ const unsigned num_top = n - floor;
+ double ezt = 0;
+ double ezb = 0;
+ if (num_top)
+ ezt = UnnormalizedProb(i, ceil, m, n, alpha) * (1.0 - pow(ratio, num_top)) / (1.0 - ratio);
+ if (floor)
+ ezb = UnnormalizedProb(i, floor, m, n, alpha) * (1.0 - pow(ratio, floor)) / (1.0 - ratio);
+ return ezb + ezt;
+ }
+
+ static double ComputeDLogZ(const unsigned i, const unsigned m, const unsigned n, const double alpha) {
+ const double z = ComputeZ(i, n, m, alpha);
+ const double split = double(i) * n / m;
+ const unsigned floor = split;
+ const unsigned ceil = floor + 1;
+ const double ratio = exp(-alpha / n);
+ const double d = -1.0 / n;
+ const unsigned num_top = n - floor;
+ double pct = 0;
+ double pcb = 0;
+ if (num_top) {
+ pct = arithmetico_geometric_series(feat(i, ceil, m, n), UnnormalizedProb(i, ceil, m, n, alpha), ratio, d, num_top);
+ //cerr << "PCT = " << pct << endl;
+ }
+ if (floor) {
+ pcb = arithmetico_geometric_series(feat(i, floor, m, n), UnnormalizedProb(i, floor, m, n, alpha), ratio, d, floor);
+ //cerr << "PCB = " << pcb << endl;
+ }
+ return (pct + pcb) / z;
+ }
+
+ private:
+ inline static double feat(const unsigned i, const unsigned j, const unsigned m, const unsigned n) {
+ return -fabs(double(j) / n - double(i) / m);
+ }
+
+ inline static double arithmetico_geometric_series(const double a_1, const double g_1, const double r, const double d, const unsigned n) {
+ const double g_np1 = g_1 * pow(r, n);
+ const double a_n = d * (n - 1) + a_1;
+ const double x_1 = a_1 * g_1;
+ const double g_2 = g_1 * r;
+ const double rm1 = r - 1;
+ return (a_n * g_np1 - x_1) / rm1 - d*(g_np1 - g_2) / (rm1 * rm1);
+ }
+};
+
+#endif