From 5dbde8313ec3d8448be8bd1df13f0e1945b037de Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Fri, 10 Feb 2012 13:18:59 -0500
Subject: move atools to utils directory

---
 training/Makefile.am | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'training/Makefile.am')
diff --git a/training/Makefile.am b/training/Makefile.am
index 2a11ae52..d2f1ccc5 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -6,7 +6,6 @@ bin_PROGRAMS = \
   mr_reduce_to_weights \
   mr_optimize_reduce \
   grammar_convert \
-  atools \
   plftools \
   collapse_weights \
   mpi_extract_reachable \
@@ -47,9 +46,6 @@ augment_grammar_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/lib
 test_ngram_SOURCES = test_ngram.cc
 test_ngram_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a ../klm/lm/libklm.a ../klm/util/libklm_util.a -lz
 
-atools_SOURCES = atools.cc
-atools_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
-
 model1_SOURCES = model1.cc ttables.cc
 model1_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
-- 
cgit v1.2.3


From 48874716843a5c5b023a397c4f3dc6e8ec63935e Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Sat, 18 Feb 2012 22:09:47 -0500
Subject: initial lbl_model stub

---
 training/Makefile.am  |   4 ++
 training/lbl_model.cc | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 135 insertions(+)
 create mode 100644 training/lbl_model.cc

(limited to 'training/Makefile.am')

diff --git a/training/Makefile.am b/training/Makefile.am
index d2f1ccc5..330341ac 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -1,5 +1,6 @@
 bin_PROGRAMS = \
   model1 \
+  lbl_model \
   test_ngram \
   mr_em_map_adapter \
   mr_em_adapted_reduce \
@@ -49,6 +50,9 @@ test_ngram_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteva
 model1_SOURCES = model1.cc ttables.cc
 model1_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
+lbl_model_SOURCES = lbl_model.cc ttables.cc
+lbl_model_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
+
 grammar_convert_SOURCES = grammar_convert.cc
 grammar_convert_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
diff --git a/training/lbl_model.cc b/training/lbl_model.cc
new file mode 100644
index 00000000..72d80a56
--- /dev/null
+++ b/training/lbl_model.cc
@@ -0,0 +1,131 @@
+#include <iostream>
+
+#include "config.h"
+#ifndef HAVE_EIGEN
+  int main() { std::cerr << "Please rebuild with --with-eigen PATH\n"; return 1; }
+#else
+
+#include <cmath>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+#include <Eigen/Dense>
+
+#include "m.h"
+#include "lattice.h"
+#include "stringlib.h"
+#include "filelib.h"
+#include "tdict.h"
+
+namespace po = boost::program_options;
+using namespace std;
+
+bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("iterations,i",po::value<unsigned>()->default_value(5),"Number of iterations of training")
+        ("diagonal_tension,T", po::value<double>()->default_value(4.0), "How sharp or flat around the diagonal is the alignment distribution (0 = uniform, >0 sharpens)")
+        ("testset,x", po::value<string>(), "After training completes, compute the log likelihood of this set of sentence pairs under the learned model");
+  po::options_description clo("Command line options");
+  clo.add_options()
+        ("config", po::value<string>(), "Configuration file")
+        ("help,h", "Print this help message and exit");
+  po::options_description dconfig_options, dcmdline_options;
+  dconfig_options.add(opts);
+  dcmdline_options.add(opts).add(clo);
+  
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  if (conf->count("config")) {
+    ifstream config((*conf)["config"].as<string>().c_str());
+    po::store(po::parse_config_file(config, dconfig_options), *conf);
+  }
+  po::notify(*conf);
+
+  if (argc < 2 || conf->count("help")) {
+    cerr << "Usage " << argv[0] << " [OPTIONS] corpus.fr-en\n";
+    cerr << dcmdline_options << endl;
+    return false;
+  }
+  return true;
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  if (!InitCommandLine(argc, argv, &conf)) return 1;
+  const string fname = argv[argc - 1];
+  const int ITERATIONS = conf["iterations"].as<unsigned>();
+  const double diagonal_tension = conf["diagonal_tension"].as<double>();
+  string testset;
+  if (conf.count("testset")) testset = conf["testset"].as<string>();
+
+  double tot_len_ratio = 0;
+  double mean_srclen_multiplier = 0;
+  vector<double> unnormed_a_i;
+  for (int iter = 0; iter < ITERATIONS; ++iter) {
+    cerr << "ITERATION " << (iter + 1) << endl;
+    ReadFile rf(fname);
+    istream& in = *rf.stream();
+    double likelihood = 0;
+    double denom = 0.0;
+    int lc = 0;
+    bool flag = false;
+    string line;
+    string ssrc, strg;
+    while(true) {
+      getline(in, line);
+      if (!in) break;
+      ++lc;
+      if (lc % 1000 == 0) { cerr << '.'; flag = true; }
+      if (lc %50000 == 0) { cerr << " [" << lc << "]\n" << flush; flag = false; }
+      ParseTranslatorInput(line, &ssrc, &strg);
+      Lattice src, trg;
+      LatticeTools::ConvertTextToLattice(ssrc, &src);
+      LatticeTools::ConvertTextToLattice(strg, &trg);
+      if (src.size() == 0 || trg.size() == 0) {
+        cerr << "Error: " << lc << "\n" << line << endl;
+        assert(src.size() > 0);
+        assert(trg.size() > 0);
+      }
+      if (src.size() > unnormed_a_i.size())
+        unnormed_a_i.resize(src.size());
+      if (iter == 0)
+        tot_len_ratio += static_cast<double>(trg.size()) / static_cast<double>(src.size());
+      denom += trg.size();
+      vector<double> probs(src.size() + 1);
+      bool first_al = true;  // used for write_alignments
+      for (int j = 0; j < trg.size(); ++j) {
+        const WordID& f_j = trg[j][0].label;
+        double sum = 0;
+        const double j_over_ts = double(j) / trg.size();
+        double prob_a_i = 1.0 / src.size();
+        double az = 0;
+        for (int ta = 0; ta < src.size(); ++ta) {
+          unnormed_a_i[ta] = exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension);
+          az += unnormed_a_i[ta];
+        }
+        for (int i = 1; i <= src.size(); ++i) {
+          prob_a_i = unnormed_a_i[i-1] / az;
+          probs[i] = 1; // tt.prob(src[i-1][0].label, f_j) * prob_a_i;
+          sum += probs[i];
+        }
+      }
+    }
+
+    // log(e) = 1.0
+    double base2_likelihood = likelihood / log(2);
+
+    if (flag) { cerr << endl; }
+    if (iter == 0) {
+      mean_srclen_multiplier = tot_len_ratio / lc;
+      cerr << "expected target length = source length * " << mean_srclen_multiplier << endl;
+    }
+    cerr << "  log_e likelihood: " << likelihood << endl;
+    cerr << "  log_2 likelihood: " << base2_likelihood << endl;
+    cerr << "   cross entropy: " << (-base2_likelihood / denom) << endl;
+    cerr << "      perplexity: " << pow(2.0, -base2_likelihood / denom) << endl;
+  }
+  return 0;
+}
+
+#endif
+
-- 
cgit v1.2.3


From 6704c23f34940dde3951155fd77246bb6229ba95 Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Tue, 21 Feb 2012 17:51:44 -0500
Subject: use lbfgs

---
 training/Makefile.am  |  2 +-
 training/lbl_model.cc | 33 ++++++++++++++++++++++++++++-----
 2 files changed, 29 insertions(+), 6 deletions(-)

(limited to 'training/Makefile.am')

diff --git a/training/Makefile.am b/training/Makefile.am
index 330341ac..991ac210 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -50,7 +50,7 @@ test_ngram_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteva
 model1_SOURCES = model1.cc ttables.cc
 model1_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
-lbl_model_SOURCES = lbl_model.cc ttables.cc
+lbl_model_SOURCES = lbl_model.cc optimize.cc
 lbl_model_LDADD = $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/utils/libutils.a -lz
 
 grammar_convert_SOURCES = grammar_convert.cc
diff --git a/training/lbl_model.cc b/training/lbl_model.cc
index 4759eedc..eb3e194d 100644
--- a/training/lbl_model.cc
+++ b/training/lbl_model.cc
@@ -16,6 +16,7 @@
 #include <boost/program_options/variables_map.hpp>
 #include <Eigen/Dense>
 
+#include "optimize.h"
 #include "array2d.h"
 #include "m.h"
 #include "lattice.h"
@@ -26,7 +27,7 @@
 namespace po = boost::program_options;
 using namespace std;
 
-#define kDIMENSIONS 25
+#define kDIMENSIONS 8
 typedef Eigen::Matrix<float, kDIMENSIONS, 1> RVector;
 typedef Eigen::Matrix<float, 1, kDIMENSIONS> RTVector;
 typedef Eigen::Matrix<float, kDIMENSIONS, kDIMENSIONS> TMatrix;
@@ -69,6 +70,21 @@ void Normalize(RVector* v) {
   *v /= norm;
 }
 
+void Flatten(const TMatrix& m, vector<double>* v) {
+  unsigned c = 0;
+  v->resize(kDIMENSIONS * kDIMENSIONS);
+  for (unsigned i = 0; i < kDIMENSIONS; ++i)
+    for (unsigned j = 0; j < kDIMENSIONS; ++j)
+      (*v)[c++] = m(i,j);
+}
+
+void Unflatten(const vector<double>& v, TMatrix* m) {
+  unsigned c = 0;
+  for (unsigned i = 0; i < kDIMENSIONS; ++i)
+    for (unsigned j = 0; j < kDIMENSIONS; ++j)
+      (*m)(i, j) = v[c++];
+}
+
 int main(int argc, char** argv) {
   po::variables_map conf;
   if (!InitCommandLine(argc, argv, &conf)) return 1;
@@ -76,7 +92,7 @@ int main(int argc, char** argv) {
   const int ITERATIONS = conf["iterations"].as<unsigned>();
   const float eta = conf["eta"].as<float>();
   const double diagonal_tension = conf["diagonal_tension"].as<double>();
-  bool SGD = true;
+  bool SGD = false;
   if (diagonal_tension < 0.0) {
     cerr << "Invalid value for diagonal_tension: must be >= 0\n";
     return 1;
@@ -121,6 +137,7 @@ int main(int argc, char** argv) {
   cerr << "Number of target word types: " << vocab_e.size() << endl;
   const float num_examples = lc;
 
+  LBFGSOptimizer lbfgs(kDIMENSIONS * kDIMENSIONS, 100);
   r_trg.resize(TD::NumWords() + 1);
   r_src.resize(TD::NumWords() + 1);
   if (conf.count("random_seed")) {
@@ -130,7 +147,7 @@ int main(int argc, char** argv) {
     cerr << "Random seed: " << seed << endl;
     srand(seed);
   }
-  TMatrix t = TMatrix::Random() / 100.0;
+  TMatrix t = TMatrix::Random() / 1024.0;
   for (unsigned i = 1; i < r_trg.size(); ++i) {
     r_trg[i] = RVector::Random();
     r_src[i] = RVector::Random();
@@ -145,6 +162,8 @@ int main(int argc, char** argv) {
   TMatrix g;
   vector<TMatrix> exp_src;
   vector<double> z_src;
+  vector<double> flat_g, flat_t;
+  Flatten(t, &flat_t);
   for (int iter = 0; iter < ITERATIONS; ++iter) {
     cerr << "ITERATION " << (iter + 1) << endl;
     ReadFile rf(fname);
@@ -236,8 +255,6 @@ int main(int argc, char** argv) {
         if (SGD) {
           t -= g * eta / num_examples;
           g *= 0;
-        } else {
-          assert(!"not implemented");
         }
       }
       
@@ -250,6 +267,12 @@ int main(int argc, char** argv) {
     cerr << "  log_2 likelihood: " << base2_likelihood << endl;
     cerr << "   cross entropy: " << (-base2_likelihood / denom) << endl;
     cerr << "      perplexity: " << pow(2.0, -base2_likelihood / denom) << endl;
+    if (!SGD) {
+      Flatten(g, &flat_g);
+      lbfgs.Optimize(-likelihood, flat_g, &flat_t);
+      Unflatten(flat_t, &t);
+      if (lbfgs.HasConverged()) break;
+    }
     cerr << t << endl;
   }
   cerr << "TRANSLATION MATRIX:" << endl << t << endl;
-- 
cgit v1.2.3