diff options
Diffstat (limited to 'dtrain/test/logreg_cd')
-rw-r--r-- | dtrain/test/logreg_cd/bin_class.cc | 4 | ||||
-rw-r--r-- | dtrain/test/logreg_cd/bin_class.h | 22 | ||||
-rw-r--r-- | dtrain/test/logreg_cd/log_reg.cc | 39 | ||||
-rw-r--r-- | dtrain/test/logreg_cd/log_reg.h | 14 |
4 files changed, 79 insertions, 0 deletions
diff --git a/dtrain/test/logreg_cd/bin_class.cc b/dtrain/test/logreg_cd/bin_class.cc new file mode 100644 index 00000000..19bcde25 --- /dev/null +++ b/dtrain/test/logreg_cd/bin_class.cc @@ -0,0 +1,4 @@ +#include "bin_class.h" + +Objective::~Objective() {} + diff --git a/dtrain/test/logreg_cd/bin_class.h b/dtrain/test/logreg_cd/bin_class.h new file mode 100644 index 00000000..3466109a --- /dev/null +++ b/dtrain/test/logreg_cd/bin_class.h @@ -0,0 +1,22 @@ +#ifndef _BIN_CLASS_H_ +#define _BIN_CLASS_H_ + +#include <vector> +#include "sparse_vector.h" + +struct TrainingInstance { + // TODO add other info? loss for MIRA-type updates? + SparseVector<double> x_feature_map; + bool y; +}; + +struct Objective { + virtual ~Objective(); + + // returns f(x) and f'(x) + virtual double ObjectiveAndGradient(const SparseVector<double>& x, + const std::vector<TrainingInstance>& training_instances, + SparseVector<double>* g) const = 0; +}; + +#endif diff --git a/dtrain/test/logreg_cd/log_reg.cc b/dtrain/test/logreg_cd/log_reg.cc new file mode 100644 index 00000000..ec2331fe --- /dev/null +++ b/dtrain/test/logreg_cd/log_reg.cc @@ -0,0 +1,39 @@ +#include "log_reg.h" + +#include <vector> +#include <cmath> + +#include "sparse_vector.h" + +using namespace std; + +double LogisticRegression::ObjectiveAndGradient(const SparseVector<double>& x, + const vector<TrainingInstance>& training_instances, + SparseVector<double>* g) const { + double cll = 0; + for (int i = 0; i < training_instances.size(); ++i) { + const double dotprod = training_instances[i].x_feature_map.dot(x); // TODO no bias, if bias, add x[0] + double lp_false = dotprod; + double lp_true = -dotprod; + if (0 < lp_true) { + lp_true += log1p(exp(-lp_true)); + lp_false = log1p(exp(lp_false)); + } else { + lp_true = log1p(exp(lp_true)); + lp_false += log1p(exp(-lp_false)); + } + lp_true *= -1; + lp_false *= -1; + if (training_instances[i].y) { // true label + cll -= lp_true; + (*g) -= training_instances[i].x_feature_map * exp(lp_false); + // (*g)[0] -= exp(lp_false); // bias + } else { // false label + cll -= lp_false; + (*g) += training_instances[i].x_feature_map * exp(lp_true); + // g += corpus[i].second * exp(lp_true); + } + } + return cll; +} + diff --git a/dtrain/test/logreg_cd/log_reg.h b/dtrain/test/logreg_cd/log_reg.h new file mode 100644 index 00000000..ecc560b8 --- /dev/null +++ b/dtrain/test/logreg_cd/log_reg.h @@ -0,0 +1,14 @@ +#ifndef _LOG_REG_H_ +#define _LOG_REG_H_ + +#include <vector> +#include "sparse_vector.h" +#include "bin_class.h" + +struct LogisticRegression : public Objective { + double ObjectiveAndGradient(const SparseVector<double>& x, + const std::vector<TrainingInstance>& training_instances, + SparseVector<double>* g) const; +}; + +#endif |