From 3dfa575d202c9277060bc43a7af9351702da9f12 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Fri, 11 Mar 2011 09:05:04 -0500 Subject: fix my dumb bug that killed qsub functionality --- vest/dist-vest.pl | 2 -- vest/parallelize.pl | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'vest') diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 973a29ef..f6f661b9 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -194,7 +194,6 @@ sub modbin { my $src=$$_; $$_="$bindir/".basename($src); check_call("cp -p $src $$_"); - die "cp $src $$_ failed: $!" unless $? == 0; } } sub dirsize { @@ -374,7 +373,6 @@ while (1){ $nmappers++; my $qcmd = "QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; my $jobid = check_output("$qcmd"); - die "qsub failed: $!\nCMD was: $qcmd" unless $? == 0; chomp $jobid; $jobid =~ s/^(\d+)(.*?)$/\1/g; $jobid =~ s/^Your job (\d+) .*$/\1/; diff --git a/vest/parallelize.pl b/vest/parallelize.pl index 47b77c79..2798a303 100755 --- a/vest/parallelize.pl +++ b/vest/parallelize.pl @@ -82,7 +82,7 @@ sub preview_files { my @f=grep { ! ($skipempty && -z $_) } @$l; my $fn=join(' ',map {escape_shell($_)} @f); my $cmd="tail -n $n $fn"; - check_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":""); + unchecked_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":""); } sub prefix_dirname($) { #like `dirname but if ends in / then return the whole thing @@ -323,7 +323,7 @@ sub launch_job { } if ($joblist == "") { $joblist = $jobid; } else {$joblist = $joblist . "\|" . $jobid; } - my $cleanfn=check_output("qdel $jobid 2> /dev/null"); + my $cleanfn="qdel $jobid 2> /dev/null"; push(@cleanup_cmds, $cleanfn); } close QOUT; -- cgit v1.2.3 From 13b15df6a00137395eae03ba3f33a987a916257b Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Fri, 11 Mar 2011 10:00:53 -0500 Subject: another dumb bug involving cleanup being executed preemptively --- vest/dist-vest.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'vest') diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index f6f661b9..c27af804 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -371,12 +371,12 @@ while (1){ if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } $nmappers++; - my $qcmd = "QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; + my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; my $jobid = check_output("$qcmd"); chomp $jobid; $jobid =~ s/^(\d+)(.*?)$/\1/g; $jobid =~ s/^Your job (\d+) .*$/\1/; - push(@cleanupcmds, check_output("qdel $jobid 2> /dev/null")); + push(@cleanupcmds, "qdel $jobid 2> /dev/null"); print STDERR " $jobid"; if ($joblist == "") { $joblist = $jobid; } else {$joblist = $joblist . "\|" . $jobid; } -- cgit v1.2.3 From 25d5729b850d1dc62eaf151b5550bd83963b08e8 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Fri, 11 Mar 2011 10:09:18 -0500 Subject: dont die when there are no running jobs --- vest/dist-vest.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'vest') diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index c27af804..cfddf61c 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -1,4 +1,4 @@ -#!/usr/bin/env perl +#grep!/usr/bin/env perl use strict; my @ORIG_ARGV=@ARGV; use Cwd qw(getcwd); @@ -396,7 +396,7 @@ while (1){ print STDERR "Waiting for mappers to complete...\n"; while ($nmappers > 0) { sleep 5; - my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat | grep -v ' C '"))); + my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat | awk '{if($0 !~ \" C \"){print}}'"))); $nmappers = scalar @livejobs; } print STDERR "All mappers complete.\n"; -- cgit v1.2.3 From db200aeefcfad33e789a8790961ef5c0f66d8ba3 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Fri, 11 Mar 2011 10:13:20 -0500 Subject: fail --- vest/dist-vest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'vest') diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index cfddf61c..6a5959dc 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -1,4 +1,4 @@ -#grep!/usr/bin/env perl +#!/usr/bin/env perl use strict; my @ORIG_ARGV=@ARGV; use Cwd qw(getcwd); -- cgit v1.2.3 From 92ca6e23b39043ad026c07a5aab71ffc750c1db2 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Fri, 11 Mar 2011 10:22:31 -0500 Subject: just use grep and dont check return code --- vest/dist-vest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'vest') diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 6a5959dc..f95754dc 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -396,7 +396,7 @@ while (1){ print STDERR "Waiting for mappers to complete...\n"; while ($nmappers > 0) { sleep 5; - my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat | awk '{if($0 !~ \" C \"){print}}'"))); + my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '"))); $nmappers = scalar @livejobs; } print STDERR "All mappers complete.\n"; -- cgit v1.2.3 From 6b25a85dd45af5982e07577b33c64e3b577579c3 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Fri, 11 Mar 2011 10:27:43 -0500 Subject: dont fail on possibly temporary qstat errors --- vest/parallelize.pl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'vest') diff --git a/vest/parallelize.pl b/vest/parallelize.pl index 2798a303..c2526503 100755 --- a/vest/parallelize.pl +++ b/vest/parallelize.pl @@ -283,7 +283,8 @@ sub numof_live_jobs { if ($use_fork) { die "not implemented"; } else { - my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat"))); + # We can probably continue decoding if the qstat error is only temporary + my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat"))); return ($#livejobs + 1); } } -- cgit v1.2.3 From eda8d83cd957463d32980da7c60085a820f7eae0 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Fri, 11 Mar 2011 11:06:48 -0500 Subject: be more verbose when running each child decoder process when forking. also, avoid some non-bash errors --- vest/parallelize.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'vest') diff --git a/vest/parallelize.pl b/vest/parallelize.pl index c2526503..b4783f91 100755 --- a/vest/parallelize.pl +++ b/vest/parallelize.pl @@ -347,7 +347,7 @@ sub launch_job_fork { my ($fh, $scr_name) = get_temp_script(); print $fh $script; close $fh; - my $todo = "/bin/sh $scr_name 1> $outfile 2> $errorfile"; + my $todo = "/bin/bash -xeo pipefail $scr_name 1> $outfile 2> $errorfile"; print STDERR "EXEC: $todo\n"; my $out = check_output("$todo"); print STDERR "RES: $out\n"; -- cgit v1.2.3 From dccf47501f078a354375b9f3edd481d8c8d30268 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Mon, 14 Mar 2011 17:03:51 -0400 Subject: more paranoid checking when (idiot/time-crunched) user tries to define his own tags during tuning --- vest/dist-vest.pl | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'vest') diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index f95754dc..d17d7de1 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -573,7 +573,11 @@ sub enseg { while (my $line=){ chomp $line; if ($line =~ /^\s* tags, you must include a zero-based id attribute"; + } } else { print NEWSRC "$line\n"; } -- cgit v1.2.3