diff options
Diffstat (limited to 'dtrain/test/log_reg')
| -rw-r--r-- | dtrain/test/log_reg/bin_class.cc | 4 | ||||
| -rw-r--r-- | dtrain/test/log_reg/bin_class.h | 22 | ||||
| -rw-r--r-- | dtrain/test/log_reg/log_reg.cc | 39 | ||||
| -rw-r--r-- | dtrain/test/log_reg/log_reg.h | 14 | 
4 files changed, 79 insertions, 0 deletions
| diff --git a/dtrain/test/log_reg/bin_class.cc b/dtrain/test/log_reg/bin_class.cc new file mode 100644 index 00000000..19bcde25 --- /dev/null +++ b/dtrain/test/log_reg/bin_class.cc @@ -0,0 +1,4 @@ +#include "bin_class.h" + +Objective::~Objective() {} + diff --git a/dtrain/test/log_reg/bin_class.h b/dtrain/test/log_reg/bin_class.h new file mode 100644 index 00000000..3466109a --- /dev/null +++ b/dtrain/test/log_reg/bin_class.h @@ -0,0 +1,22 @@ +#ifndef _BIN_CLASS_H_ +#define _BIN_CLASS_H_ + +#include <vector> +#include "sparse_vector.h" + +struct TrainingInstance { +  // TODO add other info? loss for MIRA-type updates? +  SparseVector<double> x_feature_map; +  bool y; +}; + +struct Objective { +  virtual ~Objective(); + +  // returns f(x) and f'(x) +  virtual double ObjectiveAndGradient(const SparseVector<double>& x, +                  const std::vector<TrainingInstance>& training_instances, +                  SparseVector<double>* g) const = 0; +}; + +#endif diff --git a/dtrain/test/log_reg/log_reg.cc b/dtrain/test/log_reg/log_reg.cc new file mode 100644 index 00000000..ec2331fe --- /dev/null +++ b/dtrain/test/log_reg/log_reg.cc @@ -0,0 +1,39 @@ +#include "log_reg.h" + +#include <vector> +#include <cmath> + +#include "sparse_vector.h" + +using namespace std; + +double LogisticRegression::ObjectiveAndGradient(const SparseVector<double>& x, +                              const vector<TrainingInstance>& training_instances, +                              SparseVector<double>* g) const { +  double cll = 0; +  for (int i = 0; i < training_instances.size(); ++i) { +    const double dotprod = training_instances[i].x_feature_map.dot(x); // TODO no bias, if bias, add x[0] +    double lp_false = dotprod; +    double lp_true = -dotprod; +    if (0 < lp_true) { +      lp_true += log1p(exp(-lp_true)); +      lp_false = log1p(exp(lp_false)); +    } else { +      lp_true = log1p(exp(lp_true)); +      lp_false += log1p(exp(-lp_false)); +    } +    lp_true *= -1; +    lp_false *= -1; +    if (training_instances[i].y) {  // true label +      cll -= lp_true; +      (*g) -= training_instances[i].x_feature_map * exp(lp_false); +      // (*g)[0] -= exp(lp_false); // bias +    } else {                  // false label +      cll -= lp_false; +      (*g) += training_instances[i].x_feature_map * exp(lp_true); +      // g += corpus[i].second * exp(lp_true); +    } +  } +  return cll; +} + diff --git a/dtrain/test/log_reg/log_reg.h b/dtrain/test/log_reg/log_reg.h new file mode 100644 index 00000000..ecc560b8 --- /dev/null +++ b/dtrain/test/log_reg/log_reg.h @@ -0,0 +1,14 @@ +#ifndef _LOG_REG_H_ +#define _LOG_REG_H_ + +#include <vector> +#include "sparse_vector.h" +#include "bin_class.h" + +struct LogisticRegression : public Objective { +  double ObjectiveAndGradient(const SparseVector<double>& x, +                              const std::vector<TrainingInstance>& training_instances, +                              SparseVector<double>* g) const; +}; + +#endif | 
