summaryrefslogtreecommitdiff
path: root/word-aligner/da.h
diff options
context:
space:
mode:
authorAvneesh Saluja <asaluja@gmail.com>2013-03-28 18:28:16 -0700
committerAvneesh Saluja <asaluja@gmail.com>2013-03-28 18:28:16 -0700
commit3d8d656fa7911524e0e6885647173474524e0784 (patch)
tree81b1ee2fcb67980376d03f0aa48e42e53abff222 /word-aligner/da.h
parentbe7f57fdd484e063775d7abf083b9fa4c403b610 (diff)
parent96fedabebafe7a38a6d5928be8fff767e411d705 (diff)
fixed conflicts
Diffstat (limited to 'word-aligner/da.h')
-rw-r--r--word-aligner/da.h79
1 files changed, 79 insertions, 0 deletions
diff --git a/word-aligner/da.h b/word-aligner/da.h
new file mode 100644
index 00000000..c979b641
--- /dev/null
+++ b/word-aligner/da.h
@@ -0,0 +1,79 @@
+#ifndef _DA_H_
+#define _DA_H_
+
+#include <cmath>
+#include <cassert>
+
+// m = trg len
+// n = src len
+// i = trg index
+// j = src index
+struct DiagonalAlignment {
+
+ static double UnnormalizedProb(const unsigned i, const unsigned j, const unsigned m, const unsigned n, const double alpha) {
+#if 0
+ assert(i > 0);
+ assert(n > 0);
+ assert(m >= i);
+ assert(n >= j);
+#endif
+ return exp(Feature(i, j, m, n) * alpha);
+ }
+
+ static double ComputeZ(const unsigned i, const unsigned m, const unsigned n, const double alpha) {
+#if 0
+ assert(i > 0);
+ assert(n > 0);
+ assert(m >= i);
+#endif
+ const double split = double(i) * n / m;
+ const unsigned floor = split;
+ unsigned ceil = floor + 1;
+ const double ratio = exp(-alpha / n);
+ const unsigned num_top = n - floor;
+ double ezt = 0;
+ double ezb = 0;
+ if (num_top)
+ ezt = UnnormalizedProb(i, ceil, m, n, alpha) * (1.0 - pow(ratio, num_top)) / (1.0 - ratio);
+ if (floor)
+ ezb = UnnormalizedProb(i, floor, m, n, alpha) * (1.0 - pow(ratio, floor)) / (1.0 - ratio);
+ return ezb + ezt;
+ }
+
+ static double ComputeDLogZ(const unsigned i, const unsigned m, const unsigned n, const double alpha) {
+ const double z = ComputeZ(i, n, m, alpha);
+ const double split = double(i) * n / m;
+ const unsigned floor = split;
+ const unsigned ceil = floor + 1;
+ const double ratio = exp(-alpha / n);
+ const double d = -1.0 / n;
+ const unsigned num_top = n - floor;
+ double pct = 0;
+ double pcb = 0;
+ if (num_top) {
+ pct = arithmetico_geometric_series(Feature(i, ceil, m, n), UnnormalizedProb(i, ceil, m, n, alpha), ratio, d, num_top);
+ //cerr << "PCT = " << pct << endl;
+ }
+ if (floor) {
+ pcb = arithmetico_geometric_series(Feature(i, floor, m, n), UnnormalizedProb(i, floor, m, n, alpha), ratio, d, floor);
+ //cerr << "PCB = " << pcb << endl;
+ }
+ return (pct + pcb) / z;
+ }
+
+ inline static double Feature(const unsigned i, const unsigned j, const unsigned m, const unsigned n) {
+ return -fabs(double(j) / n - double(i) / m);
+ }
+
+ private:
+ inline static double arithmetico_geometric_series(const double a_1, const double g_1, const double r, const double d, const unsigned n) {
+ const double g_np1 = g_1 * pow(r, n);
+ const double a_n = d * (n - 1) + a_1;
+ const double x_1 = a_1 * g_1;
+ const double g_2 = g_1 * r;
+ const double rm1 = r - 1;
+ return (a_n * g_np1 - x_1) / rm1 - d*(g_np1 - g_2) / (rm1 * rm1);
+ }
+};
+
+#endif