4 files changed, 79 insertions, 0 deletions
diff --git a/dtrain/test/logreg_cd/bin_class.cc b/dtrain/test/logreg_cd/bin_class.cc
new file mode 100644
index 00000000..19bcde25
--- /dev/null
+++ b/dtrain/test/logreg_cd/bin_class.cc
@@ -0,0 +1,4 @@
+#include "bin_class.h"
+
+Objective::~Objective() {}
+
diff --git a/dtrain/test/logreg_cd/bin_class.h b/dtrain/test/logreg_cd/bin_class.h
new file mode 100644
index 00000000..3466109a
--- /dev/null
+++ b/dtrain/test/logreg_cd/bin_class.h
@@ -0,0 +1,22 @@
+#ifndef _BIN_CLASS_H_
+#define _BIN_CLASS_H_
+
+#include <vector>
+#include "sparse_vector.h"
+
+struct TrainingInstance {
+  // TODO add other info? loss for MIRA-type updates?
+  SparseVector<double> x_feature_map;
+  bool y;
+};
+
+struct Objective {
+  virtual ~Objective();
+
+  // returns f(x) and f'(x)
+  virtual double ObjectiveAndGradient(const SparseVector<double>& x,
+                  const std::vector<TrainingInstance>& training_instances,
+                  SparseVector<double>* g) const = 0;
+};
+
+#endif
diff --git a/dtrain/test/logreg_cd/log_reg.cc b/dtrain/test/logreg_cd/log_reg.cc
new file mode 100644
index 00000000..ec2331fe
--- /dev/null
+++ b/dtrain/test/logreg_cd/log_reg.cc
@@ -0,0 +1,39 @@
+#include "log_reg.h"
+
+#include <vector>
+#include <cmath>
+
+#include "sparse_vector.h"
+
+using namespace std;
+
+double LogisticRegression::ObjectiveAndGradient(const SparseVector<double>& x,
+                              const vector<TrainingInstance>& training_instances,
+                              SparseVector<double>* g) const {
+  double cll = 0;
+  for (int i = 0; i < training_instances.size(); ++i) {
+    const double dotprod = training_instances[i].x_feature_map.dot(x); // TODO no bias, if bias, add x[0]
+    double lp_false = dotprod;
+    double lp_true = -dotprod;
+    if (0 < lp_true) {
+      lp_true += log1p(exp(-lp_true));
+      lp_false = log1p(exp(lp_false));
+    } else {
+      lp_true = log1p(exp(lp_true));
+      lp_false += log1p(exp(-lp_false));
+    }
+    lp_true *= -1;
+    lp_false *= -1;
+    if (training_instances[i].y) {  // true label
+      cll -= lp_true;
+      (*g) -= training_instances[i].x_feature_map * exp(lp_false);
+      // (*g)[0] -= exp(lp_false); // bias
+    } else {                  // false label
+      cll -= lp_false;
+      (*g) += training_instances[i].x_feature_map * exp(lp_true);
+      // g += corpus[i].second * exp(lp_true);
+    }
+  }
+  return cll;
+}
+
diff --git a/dtrain/test/logreg_cd/log_reg.h b/dtrain/test/logreg_cd/log_reg.h
new file mode 100644
index 00000000..ecc560b8
--- /dev/null
+++ b/dtrain/test/logreg_cd/log_reg.h
@@ -0,0 +1,14 @@
+#ifndef _LOG_REG_H_
+#define _LOG_REG_H_
+
+#include <vector>
+#include "sparse_vector.h"
+#include "bin_class.h"
+
+struct LogisticRegression : public Objective {
+  double ObjectiveAndGradient(const SparseVector<double>& x,
+                              const std::vector<TrainingInstance>& training_instances,
+                              SparseVector<double>* g) const;
+};
+
+#endif