From 8c8ff6c6915ebc5ce30156e3f05bf8d1966ec0a1 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Thu, 10 Mar 2011 06:54:31 -0500 Subject: glc hacking --- decoder/Makefile.am | 2 +- environment/LocalConfig.pm | 2 +- klm/util/string_piece.hh | 5 ++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/decoder/Makefile.am b/decoder/Makefile.am index 9cf4c3c4..f43e6894 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -82,5 +82,5 @@ libcdec_a_SOURCES = \ if GLC # Until we build GLC as a library... - libcdec_a_SOURCES += ff_glc.cc + libcdec_a_SOURCES += ff_glc.cc feature-factory.cc string_util.cc endif diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm index 10933f36..f365319c 100644 --- a/environment/LocalConfig.pm +++ b/environment/LocalConfig.pm @@ -33,7 +33,7 @@ my $CCONFIG = { 'HOST_REGEXP' => qr/^(thor|tyr)\.inf\.ed\.ac\.uk$/, }, 'Blacklight' => { - 'HOST_REGEXP' => qr/^(blacklight.psc.edu|bl1.psc.teragrid.org|bl0.psc.teragrid.org)$/, + 'HOST_REGEXP' => qr/^(tg-login1.blacklight.psc.teragrid.org|blacklight.psc.edu|bl1.psc.teragrid.org|bl0.psc.teragrid.org)$/, 'QSubMemFlag' => '-l pmem=', }, 'LOCAL' => { diff --git a/klm/util/string_piece.hh b/klm/util/string_piece.hh index e48ce3d9..2583db5e 100644 --- a/klm/util/string_piece.hh +++ b/klm/util/string_piece.hh @@ -48,7 +48,10 @@ #ifndef BASE_STRING_PIECE_H__ #define BASE_STRING_PIECE_H__ -#include "util/have.hh" +//Uncomment this line if you use ICU in your code. +//#define HAVE_ICU +//Uncomment this line if you want boost hashing for your StringPieces. +//#define HAVE_BOOST #ifdef HAVE_BOOST #include -- cgit v1.2.3 From 70d909f695fdb8207ce251bae9e860c3787d7711 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Thu, 10 Mar 2011 11:26:30 -0500 Subject: use non-terrible build system --- SConstruct | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 SConstruct diff --git a/SConstruct b/SConstruct new file mode 100644 index 00000000..dc5497ae --- /dev/null +++ b/SConstruct @@ -0,0 +1,51 @@ +AddOption('--prefix', + dest='prefix', + type='string', + nargs=1, + action='store', + metavar='DIR', + help='installation prefix') + +AddOption('--with-boost', + dest='boost', + type='string', + nargs=1, + action='store', + metavar='DIR', + help='boost installation directory (if in a non-standard location)') + +platform = ARGUMENTS.get('OS', Platform()) + +srcs = [] +for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mteval/*.cc']: + srcs.extend([ file for file in Glob(pattern) + if not 'test' in str(file) + and 'build_binary.cc' not in str(file) + and 'ngram_query.cc' not in str(file) + and 'mbr_kbest.cc' not in str(file) + and 'sri.cc' not in str(file) + and 'fast_score.cc' not in str(file) + ]) + +include = Split('decoder utils klm mteval .') +libPaths = [] + +boost = GetOption('boost') +if boost: + include.append(boost+'/include') + libPaths.append(boost+'/lib') + +glcDir = None +glcDir = '../GlobalLexicalCoherence' +if glcDir: + include.append(glcDir) + +env = Environment(PREFIX=GetOption('prefix'), + PLATFORM = platform, +# BINDIR = bin, + INCDIR = include, +# LIBDIR = lib, + CPPPATH = [include, '.'], + LIBPATH = libPaths, + LIBS = Split('boost_program_options boost_serialization boost_thread z')) +env.Program(target='decoder/cdec', source=srcs) -- cgit v1.2.3 From 159c889f8c65af4a97af1ced35ccedea34600fdf Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Thu, 24 Mar 2011 09:59:58 -0400 Subject: Add some new files to the GLC build --- SConstruct | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/SConstruct b/SConstruct index c21d85d5..41c60178 100644 --- a/SConstruct +++ b/SConstruct @@ -29,7 +29,8 @@ env = Environment(PREFIX=GetOption('prefix'), boost = GetOption('boost') if boost: print 'Using Boost at {0}'.format(boost) - env.Append(CPPPATH=boost+'/include', + env.Append(CCFLAGS='-DHAVE_BOOST', + CPPPATH=boost+'/include', LIBPATH=boost+'/lib') if GetOption('efence'): @@ -45,6 +46,8 @@ if glc: env.Append(CCFLAGS='-DHAVE_GLC', CPPPATH=[glc, glc+'/cdec']) srcs.append(glc+'/string_util.cc') + srcs.append(glc+'/sys_util.cc') + srcs.append(glc+'/debug.cc') srcs.append(glc+'/feature-factory.cc') srcs.append(glc+'/cdec/ff_glc.cc') -- cgit v1.2.3 From 9fcd3fdbfde418c8347603b216b182d04db28516 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Thu, 24 Mar 2011 20:54:45 -0400 Subject: Be more paranoid and check count of topbest sents, too. Also, provide path to decoder logs when we do fail. --- vest/dist-vest.pl | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 80d2471e..789b5b14 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -289,9 +289,23 @@ while (1){ my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; print STDERR "COMMAND:\n$cmd\n"; check_bash_call($cmd); - my $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l"); - print STDERR "NUMBER OF HGs: $num_hgs\n"; - die "Dev set contains $devSize sentences! Decoder failure?\n" if ($devSize != $num_hgs); + my $num_hgs; + my $num_topbest; + my $retries = 0; + while($retries < 5) { + $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l"); + $num_topbest = check_output("wc -l < $runFile"); + print STDERR "NUMBER OF HGs: $num_hgs\n"; + print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n"; + if($devSize == $num_hgs && $devSize == $num_topbest) { + last; + } else { + print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n"; + sleep(3); + } + $retries++; + } + die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest); my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -l $metric"); chomp $dec_score; print STDERR "DECODER SCORE: $dec_score\n"; -- cgit v1.2.3