diff options
Diffstat (limited to 'training')
| -rw-r--r-- | training/lbl_model.cc | 50 | 
1 files changed, 41 insertions, 9 deletions
| diff --git a/training/lbl_model.cc b/training/lbl_model.cc index eb3e194d..a114bba7 100644 --- a/training/lbl_model.cc +++ b/training/lbl_model.cc @@ -12,6 +12,7 @@  #include <cstring> // memset  #include <ctime> +#include <boost/math/special_functions/fpclassify.hpp>  #include <boost/program_options.hpp>  #include <boost/program_options/variables_map.hpp>  #include <Eigen/Dense> @@ -27,7 +28,7 @@  namespace po = boost::program_options;  using namespace std; -#define kDIMENSIONS 8 +#define kDIMENSIONS 110  typedef Eigen::Matrix<float, kDIMENSIONS, 1> RVector;  typedef Eigen::Matrix<float, 1, kDIMENSIONS> RTVector;  typedef Eigen::Matrix<float, kDIMENSIONS, kDIMENSIONS> TMatrix; @@ -38,8 +39,9 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {    opts.add_options()          ("input,i",po::value<string>(),"Input file")          ("iterations,I",po::value<unsigned>()->default_value(1000),"Number of iterations of training") +        ("regularization_strength,C",po::value<float>()->default_value(0.1),"L2 regularization strength (0 for no regularization)")          ("eta,e", po::value<float>()->default_value(0.1f), "Eta for SGD") -        ("random_seed", po::value<unsigned>(), "Random seed") +        ("random_seed,s", po::value<unsigned>(), "Random seed")          ("diagonal_tension,T", po::value<double>()->default_value(4.0), "How sharp or flat around the diagonal is the alignment distribution (0 = uniform, >0 sharpens)")          ("testset,x", po::value<string>(), "After training completes, compute the log likelihood of this set of sentence pairs under the learned model");    po::options_description clo("Command line options"); @@ -67,6 +69,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {  void Normalize(RVector* v) {    float norm = v->norm(); +  assert(norm > 0.0f);    *v /= norm;  } @@ -74,21 +77,42 @@ void Flatten(const TMatrix& m, vector<double>* v) {    unsigned c = 0;    v->resize(kDIMENSIONS * kDIMENSIONS);    for (unsigned i = 0; i < kDIMENSIONS; ++i) -    for (unsigned j = 0; j < kDIMENSIONS; ++j) +    for (unsigned j = 0; j < kDIMENSIONS; ++j) { +      assert(boost::math::isnormal(m(i, j)));        (*v)[c++] = m(i,j); +    }  }  void Unflatten(const vector<double>& v, TMatrix* m) {    unsigned c = 0;    for (unsigned i = 0; i < kDIMENSIONS; ++i) -    for (unsigned j = 0; j < kDIMENSIONS; ++j) +    for (unsigned j = 0; j < kDIMENSIONS; ++j) { +      assert(boost::math::isnormal(v[c]));        (*m)(i, j) = v[c++]; +    } +} + +double ApplyRegularization(const double C, +                           const vector<double>& weights, +                           vector<double>* g) { +  assert(weights.size() == g->size()); +  double reg = 0; +  for (size_t i = 0; i < weights.size(); ++i) { +    const double& w_i = weights[i]; +    double& g_i = (*g)[i]; +    reg += C * w_i * w_i; +    g_i += 2 * C * w_i; +  } +  return reg;  }  int main(int argc, char** argv) {    po::variables_map conf;    if (!InitCommandLine(argc, argv, &conf)) return 1;    const string fname = conf["input"].as<string>(); +  const float reg_strength = conf["regularization_strength"].as<float>(); +  const bool has_l2 = reg_strength; +  assert(reg_strength >= 0.0f);    const int ITERATIONS = conf["iterations"].as<unsigned>();    const float eta = conf["eta"].as<float>();    const double diagonal_tension = conf["diagonal_tension"].as<double>(); @@ -147,7 +171,7 @@ int main(int argc, char** argv) {      cerr << "Random seed: " << seed << endl;      srand(seed);    } -  TMatrix t = TMatrix::Random() / 1024.0; +  TMatrix t = TMatrix::Random() / 50.0;    for (unsigned i = 1; i < r_trg.size(); ++i) {      r_trg[i] = RVector::Random();      r_src[i] = RVector::Random(); @@ -159,7 +183,7 @@ int main(int argc, char** argv) {    vector<set<unsigned> > trg_pos(TD::NumWords() + 1);    // do optimization -  TMatrix g; +  TMatrix g = TMatrix::Zero();    vector<TMatrix> exp_src;    vector<double> z_src;    vector<double> flat_g, flat_t; @@ -265,11 +289,19 @@ int main(int argc, char** argv) {      const double base2_likelihood = likelihood / log(2);      cerr << "  log_e likelihood: " << likelihood << endl;      cerr << "  log_2 likelihood: " << base2_likelihood << endl; -    cerr << "   cross entropy: " << (-base2_likelihood / denom) << endl; -    cerr << "      perplexity: " << pow(2.0, -base2_likelihood / denom) << endl; +    cerr << "     cross entropy: " << (-base2_likelihood / denom) << endl; +    cerr << "        perplexity: " << pow(2.0, -base2_likelihood / denom) << endl;      if (!SGD) {        Flatten(g, &flat_g); -      lbfgs.Optimize(-likelihood, flat_g, &flat_t); +      double obj = -likelihood; +      if (has_l2) { +        const double r = ApplyRegularization(reg_strength, +                                             flat_t, +                                             &flat_g); +        obj += r; +        cerr << "    regularization: " << r << endl; +      } +      lbfgs.Optimize(obj, flat_g, &flat_t);        Unflatten(flat_t, &t);        if (lbfgs.HasConverged()) break;      } | 
