From 5dbde8313ec3d8448be8bd1df13f0e1945b037de Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 10 Feb 2012 13:18:59 -0500 Subject: move atools to utils directory --- training/Makefile.am | 4 ---- 1 file changed, 4 deletions(-) (limited to 'training/Makefile.am') diff --git a/training/Makefile.am b/training/Makefile.am index 2a11ae52..d2f1ccc5 100644 --- a/training/Makefile.am +++ b/training/Makefile.am @@ -6,7 +6,6 @@ bin_PROGRAMS = \ mr_reduce_to_weights \ mr_optimize_reduce \ grammar_convert \ - atools \ plftools \ collapse_weights \ mpi_extract_reachable \ @@ -47,9 +46,6 @@ augment_grammar_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/lib test_ngram_SOURCES = test_ngram.cc test_ngram_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz -atools_SOURCES = atools.cc -atools_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz - model1_SOURCES = model1.cc ttables.cc model1_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz -- cgit v1.2.3 From 48874716843a5c5b023a397c4f3dc6e8ec63935e Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 18 Feb 2012 22:09:47 -0500 Subject: initial lbl_model stub --- training/Makefile.am | 4 ++ training/lbl_model.cc | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 training/lbl_model.cc (limited to 'training/Makefile.am') diff --git a/training/Makefile.am b/training/Makefile.am index d2f1ccc5..330341ac 100644 --- a/training/Makefile.am +++ b/training/Makefile.am @@ -1,5 +1,6 @@ bin_PROGRAMS = \ model1 \ + lbl_model \ test_ngram \ mr_em_map_adapter \ mr_em_adapted_reduce \ @@ -49,6 +50,9 @@ test_ngram_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteva model1_SOURCES = model1.cc ttables.cc model1_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz +lbl_model_SOURCES = lbl_model.cc ttables.cc +lbl_model_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz + grammar_convert_SOURCES = grammar_convert.cc grammar_convert_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz diff --git a/training/lbl_model.cc b/training/lbl_model.cc new file mode 100644 index 00000000..72d80a56 --- /dev/null +++ b/training/lbl_model.cc @@ -0,0 +1,131 @@ +#include + +#include "config.h" +#ifndef HAVE_EIGEN + int main() { std::cerr << "Please rebuild with --with-eigen PATH\n"; return 1; } +#else + +#include + +#include +#include +#include + +#include "m.h" +#include "lattice.h" +#include "stringlib.h" +#include "filelib.h" +#include "tdict.h" + +namespace po = boost::program_options; +using namespace std; + +bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { + po::options_description opts("Configuration options"); + opts.add_options() + ("iterations,i",po::value()->default_value(5),"Number of iterations of training") + ("diagonal_tension,T", po::value()->default_value(4.0), "How sharp or flat around the diagonal is the alignment distribution (0 = uniform, >0 sharpens)") + ("testset,x", po::value(), "After training completes, compute the log likelihood of this set of sentence pairs under the learned model"); + po::options_description clo("Command line options"); + clo.add_options() + ("config", po::value(), "Configuration file") + ("help,h", "Print this help message and exit"); + po::options_description dconfig_options, dcmdline_options; + dconfig_options.add(opts); + dcmdline_options.add(opts).add(clo); + + po::store(parse_command_line(argc, argv, dcmdline_options), *conf); + if (conf->count("config")) { + ifstream config((*conf)["config"].as().c_str()); + po::store(po::parse_config_file(config, dconfig_options), *conf); + } + po::notify(*conf); + + if (argc < 2 || conf->count("help")) { + cerr << "Usage " << argv[0] << " [OPTIONS] corpus.fr-en\n"; + cerr << dcmdline_options << endl; + return false; + } + return true; +} + +int main(int argc, char** argv) { + po::variables_map conf; + if (!InitCommandLine(argc, argv, &conf)) return 1; + const string fname = argv[argc - 1]; + const int ITERATIONS = conf["iterations"].as(); + const double diagonal_tension = conf["diagonal_tension"].as(); + string testset; + if (conf.count("testset")) testset = conf["testset"].as(); + + double tot_len_ratio = 0; + double mean_srclen_multiplier = 0; + vector unnormed_a_i; + for (int iter = 0; iter < ITERATIONS; ++iter) { + cerr << "ITERATION " << (iter + 1) << endl; + ReadFile rf(fname); + istream& in = *rf.stream(); + double likelihood = 0; + double denom = 0.0; + int lc = 0; + bool flag = false; + string line; + string ssrc, strg; + while(true) { + getline(in, line); + if (!in) break; + ++lc; + if (lc % 1000 == 0) { cerr << '.'; flag = true; } + if (lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; } + ParseTranslatorInput(line, &ssrc, &strg); + Lattice src, trg; + LatticeTools::ConvertTextToLattice(ssrc, &src); + LatticeTools::ConvertTextToLattice(strg, &trg); + if (src.size() == 0 || trg.size() == 0) { + cerr << "Error: " << lc << "\n" << line << endl; + assert(src.size() > 0); + assert(trg.size() > 0); + } + if (src.size() > unnormed_a_i.size()) + unnormed_a_i.resize(src.size()); + if (iter == 0) + tot_len_ratio += static_cast(trg.size()) / static_cast(src.size()); + denom += trg.size(); + vector probs(src.size() + 1); + bool first_al = true; // used for write_alignments + for (int j = 0; j < trg.size(); ++j) { + const WordID& f_j = trg[j][0].label; + double sum = 0; + const double j_over_ts = double(j) / trg.size(); + double prob_a_i = 1.0 / src.size(); + double az = 0; + for (int ta = 0; ta < src.size(); ++ta) { + unnormed_a_i[ta] = exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension); + az += unnormed_a_i[ta]; + } + for (int i = 1; i <= src.size(); ++i) { + prob_a_i = unnormed_a_i[i-1] / az; + probs[i] = 1; // tt.prob(src[i-1][0].label, f_j) * prob_a_i; + sum += probs[i]; + } + } + } + + // log(e) = 1.0 + double base2_likelihood = likelihood / log(2); + + if (flag) { cerr << endl; } + if (iter == 0) { + mean_srclen_multiplier = tot_len_ratio / lc; + cerr << "expected target length = source length * " << mean_srclen_multiplier << endl; + } + cerr << " log_e likelihood: " << likelihood << endl; + cerr << " log_2 likelihood: " << base2_likelihood << endl; + cerr << " cross entropy: " << (-base2_likelihood / denom) << endl; + cerr << " perplexity: " << pow(2.0, -base2_likelihood / denom) << endl; + } + return 0; +} + +#endif + -- cgit v1.2.3 From 6704c23f34940dde3951155fd77246bb6229ba95 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Tue, 21 Feb 2012 17:51:44 -0500 Subject: use lbfgs --- training/Makefile.am | 2 +- training/lbl_model.cc | 33 ++++++++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 6 deletions(-) (limited to 'training/Makefile.am') diff --git a/training/Makefile.am b/training/Makefile.am index 330341ac..991ac210 100644 --- a/training/Makefile.am +++ b/training/Makefile.am @@ -50,7 +50,7 @@ test_ngram_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteva model1_SOURCES = model1.cc ttables.cc model1_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz -lbl_model_SOURCES = lbl_model.cc ttables.cc +lbl_model_SOURCES = lbl_model.cc optimize.cc lbl_model_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz grammar_convert_SOURCES = grammar_convert.cc diff --git a/training/lbl_model.cc b/training/lbl_model.cc index 4759eedc..eb3e194d 100644 --- a/training/lbl_model.cc +++ b/training/lbl_model.cc @@ -16,6 +16,7 @@ #include #include +#include "optimize.h" #include "array2d.h" #include "m.h" #include "lattice.h" @@ -26,7 +27,7 @@ namespace po = boost::program_options; using namespace std; -#define kDIMENSIONS 25 +#define kDIMENSIONS 8 typedef Eigen::Matrix RVector; typedef Eigen::Matrix RTVector; typedef Eigen::Matrix TMatrix; @@ -69,6 +70,21 @@ void Normalize(RVector* v) { *v /= norm; } +void Flatten(const TMatrix& m, vector* v) { + unsigned c = 0; + v->resize(kDIMENSIONS * kDIMENSIONS); + for (unsigned i = 0; i < kDIMENSIONS; ++i) + for (unsigned j = 0; j < kDIMENSIONS; ++j) + (*v)[c++] = m(i,j); +} + +void Unflatten(const vector& v, TMatrix* m) { + unsigned c = 0; + for (unsigned i = 0; i < kDIMENSIONS; ++i) + for (unsigned j = 0; j < kDIMENSIONS; ++j) + (*m)(i, j) = v[c++]; +} + int main(int argc, char** argv) { po::variables_map conf; if (!InitCommandLine(argc, argv, &conf)) return 1; @@ -76,7 +92,7 @@ int main(int argc, char** argv) { const int ITERATIONS = conf["iterations"].as(); const float eta = conf["eta"].as(); const double diagonal_tension = conf["diagonal_tension"].as(); - bool SGD = true; + bool SGD = false; if (diagonal_tension < 0.0) { cerr << "Invalid value for diagonal_tension: must be >= 0\n"; return 1; @@ -121,6 +137,7 @@ int main(int argc, char** argv) { cerr << "Number of target word types: " << vocab_e.size() << endl; const float num_examples = lc; + LBFGSOptimizer lbfgs(kDIMENSIONS * kDIMENSIONS, 100); r_trg.resize(TD::NumWords() + 1); r_src.resize(TD::NumWords() + 1); if (conf.count("random_seed")) { @@ -130,7 +147,7 @@ int main(int argc, char** argv) { cerr << "Random seed: " << seed << endl; srand(seed); } - TMatrix t = TMatrix::Random() / 100.0; + TMatrix t = TMatrix::Random() / 1024.0; for (unsigned i = 1; i < r_trg.size(); ++i) { r_trg[i] = RVector::Random(); r_src[i] = RVector::Random(); @@ -145,6 +162,8 @@ int main(int argc, char** argv) { TMatrix g; vector exp_src; vector z_src; + vector flat_g, flat_t; + Flatten(t, &flat_t); for (int iter = 0; iter < ITERATIONS; ++iter) { cerr << "ITERATION " << (iter + 1) << endl; ReadFile rf(fname); @@ -236,8 +255,6 @@ int main(int argc, char** argv) { if (SGD) { t -= g * eta / num_examples; g *= 0; - } else { - assert(!"not implemented"); } } @@ -250,6 +267,12 @@ int main(int argc, char** argv) { cerr << " log_2 likelihood: " << base2_likelihood << endl; cerr << " cross entropy: " << (-base2_likelihood / denom) << endl; cerr << " perplexity: " << pow(2.0, -base2_likelihood / denom) << endl; + if (!SGD) { + Flatten(g, &flat_g); + lbfgs.Optimize(-likelihood, flat_g, &flat_t); + Unflatten(flat_t, &t); + if (lbfgs.HasConverged()) break; + } cerr << t << endl; } cerr << "TRANSLATION MATRIX:" << endl << t << endl; -- cgit v1.2.3