From e90d7179326d467d5b183645692be71ebfa9e070 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Tue, 3 May 2011 12:06:31 -0400 Subject: Update scons build --- SConstruct | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/SConstruct b/SConstruct index 41c60178..93d740b7 100644 --- a/SConstruct +++ b/SConstruct @@ -2,20 +2,21 @@ # EXPERIMENTAL and HACKY version of cdec build in scons +# TODO: Persist these so that you don't have to specify flags every time +# http://www.scons.org/wiki/SavingVariablesToAFile AddOption('--prefix', dest='prefix', type='string', nargs=1, action='store', metavar='DIR', help='installation prefix') AddOption('--with-boost', dest='boost', type='string', nargs=1, action='store', metavar='DIR', help='boost installation directory (if in a non-standard location)') AddOption('--with-glc', dest='glc', type='string', nargs=1, action='store', metavar='DIR', help='path to Global Lexical Coherence package (optional)') +AddOption('--with-mpi', dest='mpi', action='store_true', + help='build tools that use Message Passing Interface? (optional)') AddOption('--efence', dest='efence', action='store_true', help='use electric fence for debugging memory corruptions') -# TODO: Troll http://www.scons.org/wiki/SconsAutoconf -# for some initial autoconf-like steps - platform = ARGUMENTS.get('OS', Platform()) -include = Split('decoder utils klm mteval .') +include = Split('decoder utils klm mteval training .') env = Environment(PREFIX=GetOption('prefix'), PLATFORM = platform, # BINDIR = bin, @@ -24,7 +25,19 @@ env = Environment(PREFIX=GetOption('prefix'), CPPPATH = include, LIBPATH = [], LIBS = Split('boost_program_options boost_serialization boost_thread z'), - CCFLAGS=Split('-g -O3')) + CCFLAGS=Split('-g -O3 -DHAVE_SCONS')) + + +# Do some autoconf-like sanity checks (http://www.scons.org/wiki/SconsAutoconf) +conf = Configure(env) +print('Checking if the environment is sane...') +if not conf.CheckCXX(): + print('!! Your compiler and/or environment is not correctly configured.') + Exit(1) +if not conf.CheckFunc('printf'): + print('!! Your compiler and/or environment is not correctly configured.') + Exit(1) +#env = conf.Finish() boost = GetOption('boost') if boost: @@ -33,9 +46,25 @@ if boost: CPPPATH=boost+'/include', LIBPATH=boost+'/lib') +if not conf.CheckLib('boost_program_options'): + print "Boost library 'boost_program_options' not found" + Exit(1) +#if not conf.CheckHeader('boost/math/special_functions/digamma.hpp'): +# print "Boost header 'digamma.hpp' not found" +# Exit(1) + +mpi = GetOption('mpi') +if mpi: + if not conf.CheckHeader('mpi.h'): + print "MPI header 'mpi.h' not found" + Exit(1) + if GetOption('efence'): env.Append(LIBS=Split('efence Segfault')) +print('Environment is sane.') +print + srcs = [] # TODO: Get rid of config.h @@ -61,6 +90,7 @@ for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mtev and 'fast_score.cc' not in str(file) and 'cdec.cc' not in str(file) and 'mr_' not in str(file) + and 'utils/ts.cc' != str(file) ]) print 'Found {0} source files'.format(len(srcs)) @@ -78,7 +108,19 @@ env.Program(target='mteval/fast_score', source=comb('mteval/fast_score.cc', srcs env.Program(target='mteval/mbr_kbest', source=comb('mteval/mbr_kbest.cc', srcs)) #env.Program(target='mteval/scorer_test', source=comb('mteval/fast_score.cc', srcs)) # TODO: phrasinator + # TODO: Various training binaries +env.Program(target='training/model1', source=comb('training/model1.cc', srcs)) +env.Program(target='training/augment_grammar', source=comb('training/augment_grammar.cc', srcs)) +env.Program(target='training/grammar_convert', source=comb('training/grammar_convert.cc', srcs)) +#env.Program(target='training/optimize_test', source=comb('training/optimize_test.cc', srcs)) +env.Program(target='training/collapse_weights', source=comb('training/collapse_weights.cc', srcs)) +#env.Program(target='training/lbfgs_test', source=comb('training/lbfgs_test.cc', srcs)) +#env.Program(target='training/mr_optimize_reduce', source=comb('training/mr_optimize_reduce.cc', srcs)) +env.Program(target='training/mr_em_map_adapter', source=comb('training/mr_em_map_adapter.cc', srcs)) +env.Program(target='training/mr_reduce_to_weights', source=comb('training/mr_reduce_to_weights.cc', srcs)) +env.Program(target='training/mr_em_adapted_reduce', source=comb('training/mr_em_adapted_reduce.cc', srcs)) + env.Program(target='vest/sentserver', source=['vest/sentserver.c'], LINKFLAGS='-all-static') env.Program(target='vest/sentclient', source=['vest/sentclient.c'], LINKFLAGS='-all-static') env.Program(target='vest/mr_vest_generate_mapper_input', source=comb('vest/mr_vest_generate_mapper_input.cc', srcs)) @@ -86,3 +128,10 @@ env.Program(target='vest/mr_vest_map', source=comb('vest/mr_vest_map.cc', srcs)) env.Program(target='vest/mr_vest_reduce', source=comb('vest/mr_vest_reduce.cc', srcs)) #env.Program(target='vest/lo_test', source=comb('vest/lo_test.cc', srcs)) # TODO: util tests + +if mpi: + env.Program(target='training/mpi_online_optimize', source=comb('training/mpi_online_optimize.cc', srcs)) + env.Program(target='training/mpi_batch_optimize', source=comb('training/mpi_batch_optimize.cc', srcs)) + env.Program(target='training/compute_cllh', source=comb('training/compute_cllh.cc', srcs)) + env.Program(target='training/cllh_filter_grammar', source=comb('training/cllh_filter_grammar.cc', srcs)) + -- cgit v1.2.3 From 148c0d57317e097f94a1562452bc50cdc23408dc Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Tue, 3 May 2011 12:13:21 -0400 Subject: Fix cross entropy and perplexity to use base2 log throughout instead of log_e then 2^x --- training/model1.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/training/model1.cc b/training/model1.cc index 4023735c..b9590ece 100644 --- a/training/model1.cc +++ b/training/model1.cc @@ -129,10 +129,15 @@ int main(int argc, char** argv) { likelihood += log(sum) + src_logprob; } } + + // log(e) = 1.0 + double base2_likelihood = likelihood / log(2); + if (flag) { cerr << endl; } - cerr << " log likelihood: " << likelihood << endl; - cerr << " cross entropy: " << (-likelihood / denom) << endl; - cerr << " perplexity: " << pow(2.0, -likelihood / denom) << endl; + cerr << " log_e likelihood: " << likelihood << endl; + cerr << " log_2 likelihood: " << base2_likelihood << endl; + cerr << " cross entropy: " << (-base2_likelihood / denom) << endl; + cerr << " perplexity: " << pow(2.0, -base2_likelihood / denom) << endl; if (!final_iteration) { if (variational_bayes) tt.NormalizeVB(alpha); -- cgit v1.2.3