summaryrefslogtreecommitdiff
path: root/training
diff options
context:
space:
mode:
Diffstat (limited to 'training')
-rw-r--r--training/mpi_online_optimize.cc17
-rw-r--r--training/online_optimizer.h23
-rw-r--r--training/optimize_test.cc2
3 files changed, 34 insertions, 8 deletions
diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc
index 325ba030..32033c19 100644
--- a/training/mpi_online_optimize.cc
+++ b/training/mpi_online_optimize.cc
@@ -64,6 +64,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
po::options_description opts("Configuration options");
opts.add_options()
("input_weights,w",po::value<string>(),"Input feature weights file")
+ ("frozen_features,z",po::value<string>(), "List of features not to optimize")
("training_data,t",po::value<string>(),"Training data corpus")
("training_agenda,a",po::value<string>(), "Text file listing a series of configuration files and the number of iterations to train using each configuration successively")
("minibatch_size_per_proc,s", po::value<unsigned>()->default_value(5), "Number of training instances evaluated per processor in each minibatch")
@@ -254,6 +255,20 @@ int main(int argc, char** argv) {
if (conf.count("input_weights"))
weights.InitFromFile(conf["input_weights"].as<string>());
+ vector<int> frozen_fids;
+ if (conf.count("frozen_features")) {
+ ReadFile rf(conf["frozen_features"].as<string>());
+ istream& in = *rf.stream();
+ string line;
+ while(in) {
+ getline(in, line);
+ if (line.empty()) continue;
+ if (line[0] == ' ' || line[line.size() - 1] == ' ') { line = Trim(line); }
+ frozen_fids.push_back(FD::Convert(line));
+ }
+ if (rank == 0) cerr << "Freezing " << frozen_fids.size() << " features.\n";
+ }
+
vector<string> corpus;
vector<int> ids;
ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus, &ids);
@@ -284,7 +299,7 @@ int main(int argc, char** argv) {
const string omethod = conf["optimization_method"].as<string>();
if (omethod == "sgd") {
const double C = conf["regularization_strength"].as<double>();
- o.reset(new CumulativeL1OnlineOptimizer(lr, total_corpus_size, C));
+ o.reset(new CumulativeL1OnlineOptimizer(lr, total_corpus_size, C, frozen_fids));
} else {
assert(!"fail");
}
diff --git a/training/online_optimizer.h b/training/online_optimizer.h
index 312aabae..28d89344 100644
--- a/training/online_optimizer.h
+++ b/training/online_optimizer.h
@@ -2,6 +2,7 @@
#define _ONL_OPTIMIZE_H_
#include <tr1/memory>
+#include <set>
#include <string>
#include <cmath>
#include "sparse_vector.h"
@@ -56,8 +57,12 @@ class OnlineOptimizer {
public:
virtual ~OnlineOptimizer();
OnlineOptimizer(const std::tr1::shared_ptr<LearningRateSchedule>& s,
- size_t batch_size)
- : N_(batch_size),schedule_(s),k_() {}
+ size_t batch_size,
+ const std::vector<int>& frozen_feats = std::vector<int>())
+ : N_(batch_size),schedule_(s),k_() {
+ for (int i = 0; i < frozen_feats.size(); ++i)
+ frozen_.insert(frozen_feats[i]);
+ }
void ResetEpoch() { k_ = 0; ResetEpochImpl(); }
void UpdateWeights(const SparseVector<double>& approx_g, int max_feat, SparseVector<double>* weights) {
++k_;
@@ -69,6 +74,7 @@ class OnlineOptimizer {
virtual void ResetEpochImpl();
virtual void UpdateWeightsImpl(const double& eta, const SparseVector<double>& approx_g, int max_feat, SparseVector<double>* weights) = 0;
const size_t N_; // number of training instances per batch
+ std::set<int> frozen_; // frozen (non-optimizing) features
private:
std::tr1::shared_ptr<LearningRateSchedule> schedule_;
@@ -78,16 +84,21 @@ class OnlineOptimizer {
class CumulativeL1OnlineOptimizer : public OnlineOptimizer {
public:
CumulativeL1OnlineOptimizer(const std::tr1::shared_ptr<LearningRateSchedule>& s,
- size_t training_instances, double C) :
- OnlineOptimizer(s, training_instances), C_(C), u_() {}
+ size_t training_instances, double C,
+ const std::vector<int>& frozen) :
+ OnlineOptimizer(s, training_instances, frozen), C_(C), u_() {}
protected:
void ResetEpochImpl() { u_ = 0; }
void UpdateWeightsImpl(const double& eta, const SparseVector<double>& approx_g, int max_feat, SparseVector<double>* weights) {
u_ += eta * C_ / N_;
- (*weights) += eta * approx_g;
+ for (SparseVector<double>::const_iterator it = approx_g.begin();
+ it != approx_g.end(); ++it) {
+ if (frozen_.count(it->first) == 0)
+ weights->add_value(it->first, eta * it->second);
+ }
for (int i = 1; i < max_feat; ++i)
- ApplyPenalty(i, weights);
+ if (frozen_.count(i) == 0) ApplyPenalty(i, weights);
}
private:
diff --git a/training/optimize_test.cc b/training/optimize_test.cc
index 6fa5efd4..fe7ca70f 100644
--- a/training/optimize_test.cc
+++ b/training/optimize_test.cc
@@ -104,7 +104,7 @@ void TestOnline() {
double eta0 = 0.2;
shared_ptr<LearningRateSchedule> r(new ExponentialDecayLearningRate(N, eta0, 0.85));
//shared_ptr<LearningRateSchedule> r(new StandardLearningRate(N, eta0));
- CumulativeL1OnlineOptimizer opt(r, N, C);
+ CumulativeL1OnlineOptimizer opt(r, N, C, std::vector<int>());
assert(r->eta(10) < r->eta(1));
}