diff options
Diffstat (limited to 'training')
| -rw-r--r-- | training/mpi_online_optimize.cc | 17 | ||||
| -rw-r--r-- | training/online_optimizer.h | 23 | ||||
| -rw-r--r-- | training/optimize_test.cc | 2 | 
3 files changed, 34 insertions, 8 deletions
| diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc index 325ba030..32033c19 100644 --- a/training/mpi_online_optimize.cc +++ b/training/mpi_online_optimize.cc @@ -64,6 +64,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {    po::options_description opts("Configuration options");    opts.add_options()          ("input_weights,w",po::value<string>(),"Input feature weights file") +        ("frozen_features,z",po::value<string>(), "List of features not to optimize")          ("training_data,t",po::value<string>(),"Training data corpus")          ("training_agenda,a",po::value<string>(), "Text file listing a series of configuration files and the number of iterations to train using each configuration successively")          ("minibatch_size_per_proc,s", po::value<unsigned>()->default_value(5), "Number of training instances evaluated per processor in each minibatch") @@ -254,6 +255,20 @@ int main(int argc, char** argv) {    if (conf.count("input_weights"))      weights.InitFromFile(conf["input_weights"].as<string>()); +  vector<int> frozen_fids; +  if (conf.count("frozen_features")) { +    ReadFile rf(conf["frozen_features"].as<string>()); +    istream& in = *rf.stream(); +    string line; +    while(in) { +      getline(in, line); +      if (line.empty()) continue; +      if (line[0] == ' ' || line[line.size() - 1] == ' ') { line = Trim(line); } +      frozen_fids.push_back(FD::Convert(line)); +    } +    if (rank == 0) cerr << "Freezing " << frozen_fids.size() << " features.\n"; +  } +    vector<string> corpus;    vector<int> ids;    ReadTrainingCorpus(conf["training_data"].as<string>(), rank, size, &corpus, &ids); @@ -284,7 +299,7 @@ int main(int argc, char** argv) {      const string omethod = conf["optimization_method"].as<string>();      if (omethod == "sgd") {        const double C = conf["regularization_strength"].as<double>(); -      o.reset(new CumulativeL1OnlineOptimizer(lr, total_corpus_size, C)); +      o.reset(new CumulativeL1OnlineOptimizer(lr, total_corpus_size, C, frozen_fids));      } else {        assert(!"fail");      } diff --git a/training/online_optimizer.h b/training/online_optimizer.h index 312aabae..28d89344 100644 --- a/training/online_optimizer.h +++ b/training/online_optimizer.h @@ -2,6 +2,7 @@  #define _ONL_OPTIMIZE_H_  #include <tr1/memory> +#include <set>  #include <string>  #include <cmath>  #include "sparse_vector.h" @@ -56,8 +57,12 @@ class OnlineOptimizer {   public:    virtual ~OnlineOptimizer();    OnlineOptimizer(const std::tr1::shared_ptr<LearningRateSchedule>& s, -                  size_t batch_size) -    : N_(batch_size),schedule_(s),k_() {} +                  size_t batch_size, +                  const std::vector<int>& frozen_feats = std::vector<int>()) +      : N_(batch_size),schedule_(s),k_() { +    for (int i = 0; i < frozen_feats.size(); ++i) +      frozen_.insert(frozen_feats[i]); +  }    void ResetEpoch() { k_ = 0; ResetEpochImpl(); }    void UpdateWeights(const SparseVector<double>& approx_g, int max_feat, SparseVector<double>* weights) {      ++k_; @@ -69,6 +74,7 @@ class OnlineOptimizer {    virtual void ResetEpochImpl();    virtual void UpdateWeightsImpl(const double& eta, const SparseVector<double>& approx_g, int max_feat, SparseVector<double>* weights) = 0;    const size_t N_; // number of training instances per batch +  std::set<int> frozen_;  // frozen (non-optimizing) features   private:    std::tr1::shared_ptr<LearningRateSchedule> schedule_; @@ -78,16 +84,21 @@ class OnlineOptimizer {  class CumulativeL1OnlineOptimizer : public OnlineOptimizer {   public:    CumulativeL1OnlineOptimizer(const std::tr1::shared_ptr<LearningRateSchedule>& s, -                              size_t training_instances, double C) : -    OnlineOptimizer(s, training_instances), C_(C), u_() {} +                              size_t training_instances, double C, +                              const std::vector<int>& frozen) : +    OnlineOptimizer(s, training_instances, frozen), C_(C), u_() {}   protected:    void ResetEpochImpl() { u_ = 0; }    void UpdateWeightsImpl(const double& eta, const SparseVector<double>& approx_g, int max_feat, SparseVector<double>* weights) {      u_ += eta * C_ / N_; -    (*weights) += eta * approx_g; +    for (SparseVector<double>::const_iterator it = approx_g.begin();  +         it != approx_g.end(); ++it) { +      if (frozen_.count(it->first) == 0) +        weights->add_value(it->first, eta * it->second); +    }      for (int i = 1; i < max_feat; ++i) -      ApplyPenalty(i, weights); +      if (frozen_.count(i) == 0) ApplyPenalty(i, weights);    }   private: diff --git a/training/optimize_test.cc b/training/optimize_test.cc index 6fa5efd4..fe7ca70f 100644 --- a/training/optimize_test.cc +++ b/training/optimize_test.cc @@ -104,7 +104,7 @@ void TestOnline() {    double eta0 = 0.2;    shared_ptr<LearningRateSchedule> r(new ExponentialDecayLearningRate(N, eta0, 0.85));    //shared_ptr<LearningRateSchedule> r(new StandardLearningRate(N, eta0)); -  CumulativeL1OnlineOptimizer opt(r, N, C); +  CumulativeL1OnlineOptimizer opt(r, N, C, std::vector<int>());    assert(r->eta(10) < r->eta(1));  } | 
