From 47aa8d94d3ddff39295966cee67ce884c98be8da Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 27 Jan 2012 14:49:08 -0500 Subject: rename vest to dpmert (dynamic programming mert), rename variables and types to correspond to standard geometric concepts --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'configure.ac') diff --git a/configure.ac b/configure.ac index 131a1705..cd78ee72 100644 --- a/configure.ac +++ b/configure.ac @@ -113,4 +113,4 @@ then AM_CONDITIONAL([GLC], true) fi -AC_OUTPUT(Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile phrasinator/Makefile training/Makefile vest/Makefile pro-train/Makefile klm/util/Makefile klm/lm/Makefile mira/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile gi/pf/Makefile gi/markov_al/Makefile) +AC_OUTPUT(Makefile utils/Makefile mteval/Makefile extools/Makefile decoder/Makefile phrasinator/Makefile training/Makefile dpmert/Makefile pro-train/Makefile klm/util/Makefile klm/lm/Makefile mira/Makefile gi/pyp-topics/src/Makefile gi/clda/src/Makefile gi/pf/Makefile gi/markov_al/Makefile) -- cgit v1.2.3 From a38b3fa383412e56eb958db998662c026bc08f4b Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 17 Feb 2012 13:01:54 -0500 Subject: boost version checking, check for Eigen, get rid of old digamma stuff --- configure.ac | 21 +++++++++++++++------ training/em_utils.h | 24 ------------------------ training/model1.cc | 1 - training/mr_em_adapted_reduce.cc | 6 +++--- training/ttables.h | 4 ++-- utils/m.h | 6 ++++++ 6 files changed, 26 insertions(+), 36 deletions(-) delete mode 100644 training/em_utils.h (limited to 'configure.ac') diff --git a/configure.ac b/configure.ac index cd78ee72..aa79027f 100644 --- a/configure.ac +++ b/configure.ac @@ -9,7 +9,7 @@ esac AC_PROG_CC AC_PROG_CXX AC_LANG_CPLUSPLUS -BOOST_REQUIRE +BOOST_REQUIRE([1.44]) BOOST_PROGRAM_OPTIONS AC_ARG_ENABLE(mpi, [ --enable-mpi Build MPI binaries, assumes mpi.h is present ], @@ -38,7 +38,7 @@ then CPPFLAGS="$CPPFLAGS -I${with_cmph}/include" AC_CHECK_HEADER(cmph.h, - [AC_DEFINE([HAVE_CMPH], [], [flag for cmph perfect hashing library])], + [AC_DEFINE([HAVE_CMPH], [1], [flag for cmph perfect hashing library])], [AC_MSG_ERROR([Cannot find cmph library!])]) LDFLAGS="$LDFLAGS -L${with_cmph}/lib" @@ -46,6 +46,18 @@ then AM_CONDITIONAL([HAVE_CMPH], true) fi +if test "x$with_eigen" != 'xno' +then + SAVE_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS -I${with_eigen}" + + AC_CHECK_HEADER(Eigen, + [AC_DEFINE([HAVE_EIGEN], [1], [flag for Eigen linear algebra library])], + [AC_MSG_ERROR([Cannot find Eigen!])]) + + AM_CONDITIONAL([HAVE_EIGEN], true) +fi + #BOOST_THREADS CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" LDFLAGS="$LDFLAGS $BOOST_PROGRAM_OPTIONS_LDFLAGS" @@ -53,11 +65,8 @@ LDFLAGS="$LDFLAGS $BOOST_PROGRAM_OPTIONS_LDFLAGS" LIBS="$LIBS $BOOST_PROGRAM_OPTIONS_LIBS" # $BOOST_THREAD_LIBS" -AC_CHECK_HEADER(boost/math/special_functions/digamma.hpp, - [AC_DEFINE([HAVE_BOOST_DIGAMMA], [], [flag for boost::math::digamma])]) - AC_CHECK_HEADER(google/dense_hash_map, - [AC_DEFINE([HAVE_SPARSEHASH], [], [flag for google::dense_hash_map])]) + [AC_DEFINE([HAVE_SPARSEHASH], [1], [flag for google::dense_hash_map])]) AC_PROG_INSTALL GTEST_LIB_CHECK(1.0) diff --git a/training/em_utils.h b/training/em_utils.h deleted file mode 100644 index 37762978..00000000 --- a/training/em_utils.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef _EM_UTILS_H_ -#define _EM_UTILS_H_ - -#include "config.h" -#ifdef HAVE_BOOST_DIGAMMA -#include -using boost::math::digamma; -#else -#warning Using Mark Johnsons digamma() -#include -inline double digamma(double x) { - double result = 0, xx, xx2, xx4; - assert(x > 0); - for ( ; x < 7; ++x) - result -= 1/x; - x -= 1.0/2.0; - xx = 1.0/x; - xx2 = xx*xx; - xx4 = xx2*xx2; - result += log(x)+(1./24.)*xx2-(7.0/960.0)*xx4+(31.0/8064.0)*xx4*xx2-(127.0/30720.0)*xx4*xx4; - return result; -} -#endif -#endif diff --git a/training/model1.cc b/training/model1.cc index 40249aa3..a87d388f 100644 --- a/training/model1.cc +++ b/training/model1.cc @@ -9,7 +9,6 @@ #include "filelib.h" #include "ttables.h" #include "tdict.h" -#include "em_utils.h" namespace po = boost::program_options; using namespace std; diff --git a/training/mr_em_adapted_reduce.cc b/training/mr_em_adapted_reduce.cc index d4c16a2f..f65b5440 100644 --- a/training/mr_em_adapted_reduce.cc +++ b/training/mr_em_adapted_reduce.cc @@ -10,7 +10,7 @@ #include "fdict.h" #include "weights.h" #include "sparse_vector.h" -#include "em_utils.h" +#include "m.h" using namespace std; namespace po = boost::program_options; @@ -63,11 +63,11 @@ void Maximize(const bool use_vb, assert(tot > 0.0); double ltot = log(tot); if (use_vb) - ltot = digamma(tot + total_event_types * alpha); + ltot = Md::digamma(tot + total_event_types * alpha); for (SparseVector::const_iterator it = counts.begin(); it != counts.end(); ++it) { if (use_vb) { - pc->set_value(it->first, NoZero(digamma(it->second + alpha) - ltot)); + pc->set_value(it->first, NoZero(Md::digamma(it->second + alpha) - ltot)); } else { pc->set_value(it->first, NoZero(log(it->second) - ltot)); } diff --git a/training/ttables.h b/training/ttables.h index 50d85a68..bf3351d2 100644 --- a/training/ttables.h +++ b/training/ttables.h @@ -4,9 +4,9 @@ #include #include +#include "m.h" #include "wordid.h" #include "tdict.h" -#include "em_utils.h" class TTable { public: @@ -39,7 +39,7 @@ class TTable { for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it) tot += it->second + alpha; for (Word2Double::iterator it = cpd.begin(); it != cpd.end(); ++it) - it->second = exp(digamma(it->second + alpha) - digamma(tot)); + it->second = exp(Md::digamma(it->second + alpha) - Md::digamma(tot)); } counts.clear(); } diff --git a/utils/m.h b/utils/m.h index b25248c2..5e45efee 100644 --- a/utils/m.h +++ b/utils/m.h @@ -3,6 +3,7 @@ #include #include +#include template struct M { @@ -81,6 +82,11 @@ struct M { } } + // digamma is the first derivative of the log-gamma function + static inline F digamma(const F& x) { + return boost::math::digamma(x); + } + }; typedef M Md; -- cgit v1.2.3 From d3ccf26cf501cb15ed300bc0ad17596a4e59fbeb Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 18 Feb 2012 15:16:17 -0500 Subject: fix diagonal model --- configure.ac | 2 +- training/model1.cc | 29 +++++++++++++++++------------ 2 files changed, 18 insertions(+), 13 deletions(-) (limited to 'configure.ac') diff --git a/configure.ac b/configure.ac index aa79027f..026dad01 100644 --- a/configure.ac +++ b/configure.ac @@ -51,7 +51,7 @@ then SAVE_CPPFLAGS="$CPPFLAGS" CPPFLAGS="$CPPFLAGS -I${with_eigen}" - AC_CHECK_HEADER(Eigen, + AC_CHECK_HEADER(Eigen/Dense, [AC_DEFINE([HAVE_EIGEN], [1], [flag for Eigen linear algebra library])], [AC_MSG_ERROR([Cannot find Eigen!])]) diff --git a/training/model1.cc b/training/model1.cc index a87d388f..73104304 100644 --- a/training/model1.cc +++ b/training/model1.cc @@ -4,6 +4,7 @@ #include #include +#include "m.h" #include "lattice.h" #include "stringlib.h" #include "filelib.h" @@ -13,11 +14,6 @@ namespace po = boost::program_options; using namespace std; -inline double log_poisson(unsigned x, const double& lambda) { - assert(lambda > 0.0); - return log(lambda) * x - lgamma(x + 1) - lambda; -} - bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() @@ -82,6 +78,7 @@ int main(int argc, char** argv) { TTable::Word2Word2Double was_viterbi; double tot_len_ratio = 0; double mean_srclen_multiplier = 0; + vector unnormed_a_i; for (int iter = 0; iter < ITERATIONS; ++iter) { const bool final_iteration = (iter == (ITERATIONS - 1)); cerr << "ITERATION " << (iter + 1) << (final_iteration ? " (FINAL)" : "") << endl; @@ -108,6 +105,8 @@ int main(int argc, char** argv) { assert(src.size() > 0); assert(trg.size() > 0); } + if (src.size() > unnormed_a_i.size()) + unnormed_a_i.resize(src.size()); if (iter == 0) tot_len_ratio += static_cast(trg.size()) / static_cast(src.size()); denom += trg.size(); @@ -125,13 +124,15 @@ int main(int argc, char** argv) { } double az = 0; if (favor_diagonal) { - for (int ta = 0; ta < src.size(); ++ta) - az += exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension); + for (int ta = 0; ta < src.size(); ++ta) { + unnormed_a_i[ta] = exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension); + az += unnormed_a_i[ta]; + } az /= prob_align_not_null; } for (int i = 1; i <= src.size(); ++i) { if (favor_diagonal) - prob_a_i = exp(-fabs(double(i) / src.size() - j_over_ts) * diagonal_tension) / az; + prob_a_i = unnormed_a_i[i-1] / az; probs[i] = tt.prob(src[i-1][0].label, f_j) * prob_a_i; sum += probs[i]; } @@ -202,7 +203,9 @@ int main(int argc, char** argv) { Lattice src, trg; LatticeTools::ConvertTextToLattice(ssrc, &src); LatticeTools::ConvertTextToLattice(strg, &trg); - double log_prob = log_poisson(trg.size(), 0.05 + src.size() * mean_srclen_multiplier); + double log_prob = Md::log_poisson(trg.size(), 0.05 + src.size() * mean_srclen_multiplier); + if (src.size() > unnormed_a_i.size()) + unnormed_a_i.resize(src.size()); // compute likelihood for (int j = 0; j < trg.size(); ++j) { @@ -216,13 +219,15 @@ int main(int argc, char** argv) { } double az = 0; if (favor_diagonal) { - for (int ta = 0; ta < src.size(); ++ta) - az += exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension); + for (int ta = 0; ta < src.size(); ++ta) { + unnormed_a_i[ta] = exp(-fabs(double(ta) / src.size() - j_over_ts) * diagonal_tension); + az += unnormed_a_i[ta]; + } az /= prob_align_not_null; } for (int i = 1; i <= src.size(); ++i) { if (favor_diagonal) - prob_a_i = exp(-fabs(double(i) / src.size() - j_over_ts) * diagonal_tension) / az; + prob_a_i = unnormed_a_i[i-1] / az; sum += tt.prob(src[i-1][0].label, f_j) * prob_a_i; } log_prob += log(sum); -- cgit v1.2.3 From 63d14a96b62163ea1cb1b42a5af009ebb82a1e4d Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 18 Feb 2012 20:22:42 +0000 Subject: fix eigen option --- configure.ac | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'configure.ac') diff --git a/configure.ac b/configure.ac index 026dad01..869f7cf9 100644 --- a/configure.ac +++ b/configure.ac @@ -46,6 +46,13 @@ then AM_CONDITIONAL([HAVE_CMPH], true) fi +AM_CONDITIONAL([HAVE_EIGEN], false) +AC_ARG_WITH(eigen, + [AC_HELP_STRING([--with-eigen=PATH], [(optional) path to Eigen linear algebra library])], + [with_eigen=$withval], + [with_eigen=no] + ) + if test "x$with_eigen" != 'xno' then SAVE_CPPFLAGS="$CPPFLAGS" -- cgit v1.2.3