finalized merge

author: Patrick Simianer <p@simianer.de> 2011-10-20 02:31:25 +0200
committer: Patrick Simianer <p@simianer.de> 2011-10-20 02:31:25 +0200
commit: a5a92ebe23c5819ed104313426012011e32539da (patch)
tree: 3416818c758d5ece4e71fe522c571e75ea04f100 /gi/pf/base_measures.h
parent: b88332caac2cbe737c99b8098813f868ca876d8b (diff)
parent: 78baccbb4231bb84a456702d4f574f8e601a8182 (diff)
1 files changed, 116 insertions, 0 deletions
diff --git a/gi/pf/base_measures.h b/gi/pf/base_measures.h
new file mode 100644
index 00000000..df17aa62
--- /dev/null
+++ b/gi/pf/base_measures.h
@@ -0,0 +1,116 @@
+#ifndef _BASE_MEASURES_H_
+#define _BASE_MEASURES_H_
+
+#include <vector>
+#include <map>
+#include <string>
+#include <cmath>
+#include <iostream>
+
+#include "trule.h"
+#include "prob.h"
+#include "tdict.h"
+
+inline double log_poisson(unsigned x, const double& lambda) {
+  assert(lambda > 0.0);
+  return log(lambda) * x - lgamma(x + 1) - lambda;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const std::vector<WordID>& p) {
+  os << '[';
+  for (int i = 0; i < p.size(); ++i)
+    os << (i==0 ? "" : " ") << TD::Convert(p[i]);
+  return os << ']';
+}
+
+struct Model1 {
+  explicit Model1(const std::string& fname) :
+      kNULL(TD::Convert("<eps>")),
+      kZERO() {
+    LoadModel1(fname);
+  }
+
+  void LoadModel1(const std::string& fname);
+
+  // returns prob 0 if src or trg is not found
+  const prob_t& operator()(WordID src, WordID trg) const {
+    if (src == 0) src = kNULL;
+    if (src < ttable.size()) {
+      const std::map<WordID, prob_t>& cpd = ttable[src];
+      const std::map<WordID, prob_t>::const_iterator it = cpd.find(trg);
+      if (it != cpd.end())
+        return it->second;
+    }
+    return kZERO;
+  }
+
+  const WordID kNULL;
+  const prob_t kZERO;
+  std::vector<std::map<WordID, prob_t> > ttable;
+};
+
+struct PhraseConditionalBase {
+  explicit PhraseConditionalBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size) :
+      model1(m1),
+      kM1MIXTURE(m1mixture),
+      kUNIFORM_MIXTURE(1.0 - m1mixture),
+      kUNIFORM_TARGET(1.0 / vocab_e_size) {
+    assert(m1mixture >= 0.0 && m1mixture <= 1.0);
+    assert(vocab_e_size > 0);
+  }
+
+  // return p0 of rule.e_ | rule.f_
+  prob_t operator()(const TRule& rule) const {
+    return p0(rule.f_, rule.e_, 0, 0);
+  }
+
+  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
+
+  const Model1& model1;
+  const prob_t kM1MIXTURE;  // Model 1 mixture component
+  const prob_t kUNIFORM_MIXTURE; // uniform mixture component
+  const prob_t kUNIFORM_TARGET;
+};
+
+struct PhraseJointBase {
+  explicit PhraseJointBase(const Model1& m1, const double m1mixture, const unsigned vocab_e_size, const unsigned vocab_f_size) :
+      model1(m1),
+      kM1MIXTURE(m1mixture),
+      kUNIFORM_MIXTURE(1.0 - m1mixture),
+      kUNIFORM_SOURCE(1.0 / vocab_f_size),
+      kUNIFORM_TARGET(1.0 / vocab_e_size) {
+    assert(m1mixture >= 0.0 && m1mixture <= 1.0);
+    assert(vocab_e_size > 0);
+  }
+
+  // return p0 of rule.e_ | rule.f_
+  prob_t operator()(const TRule& rule) const {
+    return p0(rule.f_, rule.e_, 0, 0);
+  }
+
+  prob_t p0(const std::vector<WordID>& vsrc, const std::vector<WordID>& vtrg, int start_src, int start_trg) const;
+
+  const Model1& model1;
+  const prob_t kM1MIXTURE;  // Model 1 mixture component
+  const prob_t kUNIFORM_MIXTURE; // uniform mixture component
+  const prob_t kUNIFORM_SOURCE;
+  const prob_t kUNIFORM_TARGET;
+};
+
+// base distribution for jump size multinomials
+// basically p(0) = 0 and then, p(1) is max, and then
+// you drop as you move to the max jump distance
+struct JumpBase {
+  JumpBase();
+
+  const prob_t& operator()(int jump, unsigned src_len) const {
+    assert(jump != 0);
+    const std::map<int, prob_t>::const_iterator it = p[src_len].find(jump);
+    assert(it != p[src_len].end());
+    return it->second;
+  }
+  std::vector<std::map<int, prob_t> > p;
+};
+
+
+#endif
author	Patrick Simianer <p@simianer.de>	2011-10-20 02:31:25 +0200
committer	Patrick Simianer <p@simianer.de>	2011-10-20 02:31:25 +0200
commit	a5a92ebe23c5819ed104313426012011e32539da (patch)
tree	3416818c758d5ece4e71fe522c571e75ea04f100 /gi/pf/base_measures.h
parent	b88332caac2cbe737c99b8098813f868ca876d8b (diff)
parent	78baccbb4231bb84a456702d4f574f8e601a8182 (diff)