From 3dfa575d202c9277060bc43a7af9351702da9f12 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Fri, 11 Mar 2011 09:05:04 -0500 Subject: fix my dumb bug that killed qsub functionality --- vest/dist-vest.pl | 2 -- vest/parallelize.pl | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 973a29ef..f6f661b9 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -194,7 +194,6 @@ sub modbin { my $src=$$_; $$_="$bindir/".basename($src); check_call("cp -p $src $$_"); - die "cp $src $$_ failed: $!" unless $? == 0; } } sub dirsize { @@ -374,7 +373,6 @@ while (1){ $nmappers++; my $qcmd = "QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; my $jobid = check_output("$qcmd"); - die "qsub failed: $!\nCMD was: $qcmd" unless $? == 0; chomp $jobid; $jobid =~ s/^(\d+)(.*?)$/\1/g; $jobid =~ s/^Your job (\d+) .*$/\1/; diff --git a/vest/parallelize.pl b/vest/parallelize.pl index 47b77c79..2798a303 100755 --- a/vest/parallelize.pl +++ b/vest/parallelize.pl @@ -82,7 +82,7 @@ sub preview_files { my @f=grep { ! ($skipempty && -z $_) } @$l; my $fn=join(' ',map {escape_shell($_)} @f); my $cmd="tail -n $n $fn"; - check_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":""); + unchecked_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":""); } sub prefix_dirname($) { #like `dirname but if ends in / then return the whole thing @@ -323,7 +323,7 @@ sub launch_job { } if ($joblist == "") { $joblist = $jobid; } else {$joblist = $joblist . "\|" . $jobid; } - my $cleanfn=check_output("qdel $jobid 2> /dev/null"); + my $cleanfn="qdel $jobid 2> /dev/null"; push(@cleanup_cmds, $cleanfn); } close QOUT; -- cgit v1.2.3 From 13b15df6a00137395eae03ba3f33a987a916257b Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Fri, 11 Mar 2011 10:00:53 -0500 Subject: another dumb bug involving cleanup being executed preemptively --- vest/dist-vest.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index f6f661b9..c27af804 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -371,12 +371,12 @@ while (1){ if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } $nmappers++; - my $qcmd = "QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; + my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; my $jobid = check_output("$qcmd"); chomp $jobid; $jobid =~ s/^(\d+)(.*?)$/\1/g; $jobid =~ s/^Your job (\d+) .*$/\1/; - push(@cleanupcmds, check_output("qdel $jobid 2> /dev/null")); + push(@cleanupcmds, "qdel $jobid 2> /dev/null"); print STDERR " $jobid"; if ($joblist == "") { $joblist = $jobid; } else {$joblist = $joblist . "\|" . $jobid; } -- cgit v1.2.3 From 702df29b83ca10998ea3a8f84bc2e0e6c9e86eea Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Fri, 11 Mar 2011 10:01:09 -0500 Subject: ignore emacs temp files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 3892891c..2a287bbc 100644 --- a/.gitignore +++ b/.gitignore @@ -120,3 +120,4 @@ gi/posterior-regularisation/prjava/lib/prjava-20100715.jar *.dvi *.ps *.toc +*~ \ No newline at end of file -- cgit v1.2.3 From 25d5729b850d1dc62eaf151b5550bd83963b08e8 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Fri, 11 Mar 2011 10:09:18 -0500 Subject: dont die when there are no running jobs --- vest/dist-vest.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index c27af804..cfddf61c 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -1,4 +1,4 @@ -#!/usr/bin/env perl +#grep!/usr/bin/env perl use strict; my @ORIG_ARGV=@ARGV; use Cwd qw(getcwd); @@ -396,7 +396,7 @@ while (1){ print STDERR "Waiting for mappers to complete...\n"; while ($nmappers > 0) { sleep 5; - my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat | grep -v ' C '"))); + my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat | awk '{if($0 !~ \" C \"){print}}'"))); $nmappers = scalar @livejobs; } print STDERR "All mappers complete.\n"; -- cgit v1.2.3 From db200aeefcfad33e789a8790961ef5c0f66d8ba3 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Fri, 11 Mar 2011 10:13:20 -0500 Subject: fail --- vest/dist-vest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index cfddf61c..6a5959dc 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -1,4 +1,4 @@ -#grep!/usr/bin/env perl +#!/usr/bin/env perl use strict; my @ORIG_ARGV=@ARGV; use Cwd qw(getcwd); -- cgit v1.2.3 From 92ca6e23b39043ad026c07a5aab71ffc750c1db2 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Fri, 11 Mar 2011 10:22:31 -0500 Subject: just use grep and dont check return code --- vest/dist-vest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 6a5959dc..f95754dc 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -396,7 +396,7 @@ while (1){ print STDERR "Waiting for mappers to complete...\n"; while ($nmappers > 0) { sleep 5; - my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat | awk '{if($0 !~ \" C \"){print}}'"))); + my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '"))); $nmappers = scalar @livejobs; } print STDERR "All mappers complete.\n"; -- cgit v1.2.3 From 6b25a85dd45af5982e07577b33c64e3b577579c3 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Fri, 11 Mar 2011 10:27:43 -0500 Subject: dont fail on possibly temporary qstat errors --- vest/parallelize.pl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vest/parallelize.pl b/vest/parallelize.pl index 2798a303..c2526503 100755 --- a/vest/parallelize.pl +++ b/vest/parallelize.pl @@ -283,7 +283,8 @@ sub numof_live_jobs { if ($use_fork) { die "not implemented"; } else { - my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat"))); + # We can probably continue decoding if the qstat error is only temporary + my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat"))); return ($#livejobs + 1); } } -- cgit v1.2.3 From eda8d83cd957463d32980da7c60085a820f7eae0 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Fri, 11 Mar 2011 11:06:48 -0500 Subject: be more verbose when running each child decoder process when forking. also, avoid some non-bash errors --- vest/parallelize.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vest/parallelize.pl b/vest/parallelize.pl index c2526503..b4783f91 100755 --- a/vest/parallelize.pl +++ b/vest/parallelize.pl @@ -347,7 +347,7 @@ sub launch_job_fork { my ($fh, $scr_name) = get_temp_script(); print $fh $script; close $fh; - my $todo = "/bin/sh $scr_name 1> $outfile 2> $errorfile"; + my $todo = "/bin/bash -xeo pipefail $scr_name 1> $outfile 2> $errorfile"; print STDERR "EXEC: $todo\n"; my $out = check_output("$todo"); print STDERR "RES: $out\n"; -- cgit v1.2.3 From dccf47501f078a354375b9f3edd481d8c8d30268 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Mon, 14 Mar 2011 17:03:51 -0400 Subject: more paranoid checking when (idiot/time-crunched) user tries to define his own tags during tuning --- vest/dist-vest.pl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index f95754dc..d17d7de1 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -573,7 +573,11 @@ sub enseg { while (my $line=){ chomp $line; if ($line =~ /^\s* tags, you must include a zero-based id attribute"; + } } else { print NEWSRC "$line\n"; } -- cgit v1.2.3 From 5d0f3c6aa4e78aea09952a7a65f61d3c4dce0a0e Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Mon, 14 Mar 2011 17:05:14 -0400 Subject: Fix wordset to override features() so that we can safely use multiple instances of it --- decoder/ff_wordset.h | 1 + 1 file changed, 1 insertion(+) diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h index 00e1145b..643097ef 100644 --- a/decoder/ff_wordset.h +++ b/decoder/ff_wordset.h @@ -32,6 +32,7 @@ class WordSet : public FeatureFunction { ~WordSet() { } + Features features() const { return single_feature(fid_); } protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, -- cgit v1.2.3 From 237de3db6d5917707b745e3df7be42f2497e3783 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Mon, 14 Mar 2011 17:39:04 -0400 Subject: Get enough compiling with scons to finish off the emnlp paper --- SConstruct | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/SConstruct b/SConstruct index 1a7885bc..c21d85d5 100644 --- a/SConstruct +++ b/SConstruct @@ -11,6 +11,9 @@ AddOption('--with-glc', dest='glc', type='string', nargs=1, action='store', meta AddOption('--efence', dest='efence', action='store_true', help='use electric fence for debugging memory corruptions') +# TODO: Troll http://www.scons.org/wiki/SconsAutoconf +# for some initial autoconf-like steps + platform = ARGUMENTS.get('OS', Platform()) include = Split('decoder utils klm mteval .') env = Environment(PREFIX=GetOption('prefix'), @@ -45,7 +48,7 @@ if glc: srcs.append(glc+'/feature-factory.cc') srcs.append(glc+'/cdec/ff_glc.cc') -for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mteval/*.cc']: +for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mteval/*.cc', 'vest/*.cc']: srcs.extend([ file for file in Glob(pattern) if not 'test' in str(file) and 'build_binary.cc' not in str(file) @@ -53,6 +56,30 @@ for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mtev and 'mbr_kbest.cc' not in str(file) and 'sri.cc' not in str(file) and 'fast_score.cc' not in str(file) + and 'cdec.cc' not in str(file) + and 'mr_' not in str(file) ]) -env.Program(target='decoder/cdec', source=srcs) +print 'Found {0} source files'.format(len(srcs)) +def comb(cc, srcs): + x = [cc] + x.extend(srcs) + return x + +env.Program(target='decoder/cdec', source=comb('decoder/cdec.cc', srcs)) +# TODO: The various decoder tests +# TODO: extools +env.Program(target='klm/lm/build_binary', source=comb('klm/lm/build_binary.cc', srcs)) +# TODO: klm ngram_query and tests +env.Program(target='mteval/fast_score', source=comb('mteval/fast_score.cc', srcs)) +env.Program(target='mteval/mbr_kbest', source=comb('mteval/mbr_kbest.cc', srcs)) +#env.Program(target='mteval/scorer_test', source=comb('mteval/fast_score.cc', srcs)) +# TODO: phrasinator +# TODO: Various training binaries +env.Program(target='vest/sentserver', source=['vest/sentserver.c'], LINKFLAGS='-all-static') +env.Program(target='vest/sentclient', source=['vest/sentclient.c'], LINKFLAGS='-all-static') +env.Program(target='vest/mr_vest_generate_mapper_input', source=comb('vest/mr_vest_generate_mapper_input.cc', srcs)) +env.Program(target='vest/mr_vest_map', source=comb('vest/mr_vest_map.cc', srcs)) +env.Program(target='vest/mr_vest_reduce', source=comb('vest/mr_vest_reduce.cc', srcs)) +#env.Program(target='vest/lo_test', source=comb('vest/lo_test.cc', srcs)) +# TODO: util tests -- cgit v1.2.3