From 0269777fc54bc554c12107bdd5498f743df2a1ce Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Thu, 8 Sep 2011 00:06:52 +0200 Subject: a lot of stuff, fast_sparse_vector, perceptron, removed sofia, sample [...] --- dtrain/test/log_reg/bin_class.cc | 4 ++++ dtrain/test/log_reg/bin_class.h | 22 ++++++++++++++++++++++ dtrain/test/log_reg/log_reg.cc | 39 +++++++++++++++++++++++++++++++++++++++ dtrain/test/log_reg/log_reg.h | 14 ++++++++++++++ 4 files changed, 79 insertions(+) create mode 100644 dtrain/test/log_reg/bin_class.cc create mode 100644 dtrain/test/log_reg/bin_class.h create mode 100644 dtrain/test/log_reg/log_reg.cc create mode 100644 dtrain/test/log_reg/log_reg.h (limited to 'dtrain/test/log_reg') diff --git a/dtrain/test/log_reg/bin_class.cc b/dtrain/test/log_reg/bin_class.cc new file mode 100644 index 00000000..19bcde25 --- /dev/null +++ b/dtrain/test/log_reg/bin_class.cc @@ -0,0 +1,4 @@ +#include "bin_class.h" + +Objective::~Objective() {} + diff --git a/dtrain/test/log_reg/bin_class.h b/dtrain/test/log_reg/bin_class.h new file mode 100644 index 00000000..3466109a --- /dev/null +++ b/dtrain/test/log_reg/bin_class.h @@ -0,0 +1,22 @@ +#ifndef _BIN_CLASS_H_ +#define _BIN_CLASS_H_ + +#include +#include "sparse_vector.h" + +struct TrainingInstance { + // TODO add other info? loss for MIRA-type updates? + SparseVector x_feature_map; + bool y; +}; + +struct Objective { + virtual ~Objective(); + + // returns f(x) and f'(x) + virtual double ObjectiveAndGradient(const SparseVector& x, + const std::vector& training_instances, + SparseVector* g) const = 0; +}; + +#endif diff --git a/dtrain/test/log_reg/log_reg.cc b/dtrain/test/log_reg/log_reg.cc new file mode 100644 index 00000000..ec2331fe --- /dev/null +++ b/dtrain/test/log_reg/log_reg.cc @@ -0,0 +1,39 @@ +#include "log_reg.h" + +#include +#include + +#include "sparse_vector.h" + +using namespace std; + +double LogisticRegression::ObjectiveAndGradient(const SparseVector& x, + const vector& training_instances, + SparseVector* g) const { + double cll = 0; + for (int i = 0; i < training_instances.size(); ++i) { + const double dotprod = training_instances[i].x_feature_map.dot(x); // TODO no bias, if bias, add x[0] + double lp_false = dotprod; + double lp_true = -dotprod; + if (0 < lp_true) { + lp_true += log1p(exp(-lp_true)); + lp_false = log1p(exp(lp_false)); + } else { + lp_true = log1p(exp(lp_true)); + lp_false += log1p(exp(-lp_false)); + } + lp_true *= -1; + lp_false *= -1; + if (training_instances[i].y) { // true label + cll -= lp_true; + (*g) -= training_instances[i].x_feature_map * exp(lp_false); + // (*g)[0] -= exp(lp_false); // bias + } else { // false label + cll -= lp_false; + (*g) += training_instances[i].x_feature_map * exp(lp_true); + // g += corpus[i].second * exp(lp_true); + } + } + return cll; +} + diff --git a/dtrain/test/log_reg/log_reg.h b/dtrain/test/log_reg/log_reg.h new file mode 100644 index 00000000..ecc560b8 --- /dev/null +++ b/dtrain/test/log_reg/log_reg.h @@ -0,0 +1,14 @@ +#ifndef _LOG_REG_H_ +#define _LOG_REG_H_ + +#include +#include "sparse_vector.h" +#include "bin_class.h" + +struct LogisticRegression : public Objective { + double ObjectiveAndGradient(const SparseVector& x, + const std::vector& training_instances, + SparseVector* g) const; +}; + +#endif -- cgit v1.2.3