diff options
author | Jonathan Clark <jon.h.clark@gmail.com> | 2011-03-10 17:09:21 -0500 |
---|---|---|
committer | Jonathan Clark <jon.h.clark@gmail.com> | 2011-03-10 17:09:21 -0500 |
commit | 9f821dd7f08da5a146e14863b3d49ae16e6739f4 (patch) | |
tree | 0a6fff3b9b326566cafb1cd1885862e4f9f1a3ec /vest/dist-vest.pl | |
parent | 4f9933d668d247ea5831c3f2af0b996a94da28f7 (diff) |
initial version of scons
Diffstat (limited to 'vest/dist-vest.pl')
-rwxr-xr-x | vest/dist-vest.pl | 112 |
1 files changed, 46 insertions, 66 deletions
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 2a56dd55..973a29ef 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -1,16 +1,18 @@ #!/usr/bin/env perl - use strict; my @ORIG_ARGV=@ARGV; use Cwd qw(getcwd); my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; } + +# Skip local config (used for distributing jobs) if we're running in local-only mode use LocalConfig; use Getopt::Long; use IPC::Open2; -use strict; use POSIX ":sys_wait_h"; my $QSUB_CMD = qsub_args(mert_memory()); +require "libcall.pl"; + # Default settings my $srcFile; my $refFiles; @@ -22,6 +24,7 @@ my $MAPINPUT = "$bin_dir/mr_vest_generate_mapper_input"; my $MAPPER = "$bin_dir/mr_vest_map"; my $REDUCER = "$bin_dir/mr_vest_reduce"; my $parallelize = "$bin_dir/parallelize.pl"; +my $libcall = "$bin_dir/libcall.pl"; my $sentserver = "$bin_dir/sentserver"; my $sentclient = "$bin_dir/sentclient"; my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm"; @@ -31,6 +34,7 @@ die "Can't find $MAPPER" unless -x $MAPPER; my $cdec = "$bin_dir/../decoder/cdec"; die "Can't find decoder in $cdec" unless -x $cdec; die "Can't find $parallelize" unless -x $parallelize; +die "Can't find $libcall" unless -e $libcall; my $decoder = $cdec; my $lines_per_mapper = 400; my $rand_directions = 15; @@ -124,7 +128,7 @@ sub enseg; sub print_help; my $nodelist; -my $host =`hostname`; chomp $host; +my $host =check_output("hostname"); chomp $host; my $bleu; my $interval_count = 0; my $logfile; @@ -142,7 +146,7 @@ unless ($dir){ $dir = "vest"; } unless ($dir =~ /^\//){ # convert relative path to absolute path - my $basedir = `pwd`; + my $basedir = check_output("pwd"); chomp $basedir; $dir = "$basedir/$dir"; } @@ -158,15 +162,18 @@ my @cleanupcmds = (); sub cleanup { print STDERR "Cleanup...\n"; - for my $pid (@childpids){ `kill $pid`; } - for my $cmd (@cleanupcmds){`$cmd`; } + for my $pid (@childpids){ unchecked_call("kill $pid"); } + for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); } exit 1; }; +# Always call cleanup, no matter how we exit +*CORE::GLOBAL::exit = + sub{ cleanup(); }; $SIG{INT} = "cleanup"; $SIG{TERM} = "cleanup"; $SIG{HUP} = "cleanup"; -my $decoderBase = `basename $decoder`; chomp $decoderBase; +my $decoderBase = check_output("basename $decoder"); chomp $decoderBase; my $newIniFile = "$dir/$decoderBase.ini"; my $inputFileName = "$dir/input"; my $user = $ENV{"USER"}; @@ -181,12 +188,12 @@ use File::Basename qw(basename); sub modbin { local $_; my $bindir=shift; - `mkdir -p $bindir`; + check_call("mkdir -p $bindir"); -d $bindir || die "couldn't make bindir $bindir"; for (@_) { my $src=$$_; $$_="$bindir/".basename($src); - `cp -p $src $$_`; + check_call("cp -p $src $$_"); die "cp $src $$_ failed: $!" unless $? == 0; } } @@ -203,7 +210,7 @@ if ($dryrun){ } else { -e $dir || mkdir $dir; mkdir "$dir/hgs"; - modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$REDUCER,\$parallelize,\$sentserver,\$sentclient) if $cpbin; + modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$REDUCER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin; mkdir "$dir/scripts"; my $cmdfile="$dir/rerun-vest.sh"; open CMD,'>',$cmdfile; @@ -219,7 +226,7 @@ if ($dryrun){ print_help(); exit; } - `cp $initialWeights $dir/weights.0`; + check_call("cp $initialWeights $dir/weights.0"); die "Can't find weights.0" unless (-e "$dir/weights.0"); } write_config(*STDERR); @@ -227,7 +234,7 @@ if ($dryrun){ # Generate initial files and values -`cp $iniFile $newIniFile`; +check_call("cp $iniFile $newIniFile"); $iniFile = $newIniFile; my $newsrc = "$dir/dev.input"; @@ -259,12 +266,12 @@ while (1){ my $logdir="$dir/logs.$iteration"; my $decoderLog="$logdir/decoder.sentserver.log.$iteration"; my $scorerLog="$logdir/scorer.log.$iteration"; - `mkdir -p $logdir`; + check_call("mkdir -p $logdir"); #decode print STDERR "RUNNING DECODER AT "; - print STDERR `date`; + print STDERR unchecked_output("date"); my $im1 = $iteration - 1; my $weightsFile="$dir/weights.$im1"; my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs"; @@ -275,33 +282,28 @@ while (1){ if ($run_local) { $pcmd = "cat $srcFile |"; } elsif ($use_make) { - $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $decode_nodes --"; + # TODO: Throw error when decode_nodes is specified along with use_make + $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $use_make --"; } else { $pcmd = "cat $srcFile | $parallelize $usefork -p $pmem -e $logdir -j $decode_nodes --"; } my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; print STDERR "COMMAND:\n$cmd\n"; - my $result = 0; - $result = system($cmd); - unless ($result == 0){ - cleanup(); - print STDERR "ERROR: Parallel decoder returned non-zero exit code $result\n"; - die; - } - my $num_hgs = `ls $dir/hgs/*.gz | wc -l`; + check_bash_call($cmd); + my $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l"); print STDERR "NUMBER OF HGs: $num_hgs\n"; die "Dev set contains $devSize sentences! Decoder failure?\n" if ($devSize != $num_hgs); - my $dec_score = `cat $runFile | $SCORER $refs_comma_sep -l $metric`; + my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -l $metric"); chomp $dec_score; print STDERR "DECODER SCORE: $dec_score\n"; # save space - `gzip -f $runFile`; - `gzip -f $decoderLog`; + check_call("gzip -f $runFile"); + check_call("gzip -f $decoderLog"); # run optimizer print STDERR "RUNNING OPTIMIZER AT "; - print STDERR `date`; + print STDERR unchecked_output("date"); my $mergeLog="$logdir/prune-merge.log.$iteration"; my $score = 0; @@ -309,28 +311,18 @@ while (1){ my $inweights="$dir/weights.$im1"; for (my $opt_iter=1; $opt_iter<$optimization_iters; $opt_iter++) { print STDERR "\nGENERATE OPTIMIZATION STRATEGY (OPT-ITERATION $opt_iter/$optimization_iters)\n"; - print STDERR `date`; + print STDERR unchecked_output("date"); $icc++; my $nop=$noprimary?"--no_primary":""; my $targs=$oraclen ? "--decoder_translations='$runFile.gz' ".get_comma_sep_refs('-references',$refFiles):""; my $bwargs=$bleu_weight!=1 ? "--bleu_weight=$bleu_weight":""; $cmd="$MAPINPUT -w $inweights -r $dir/hgs $bwargs -s $devSize -d $rand_directions --max_similarity=$maxsim --oracle_directions=$oraclen --oracle_batch=$oracleb $targs $dirargs > $dir/agenda.$im1-$opt_iter"; print STDERR "COMMAND:\n$cmd\n"; - $result = system($cmd); - unless ($result == 0){ - cleanup(); - die "ERROR: mapinput command returned non-zero exit code $result\n"; - } - - `mkdir -p $dir/splag.$im1`; + check_call($cmd); + check_call("mkdir -p $dir/splag.$im1"); $cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1-$opt_iter $dir/splag.$im1/mapinput."; print STDERR "COMMAND:\n$cmd\n"; - $result = system($cmd); - unless ($result == 0){ - cleanup(); - print STDERR "ERROR: split command returned non-zero exit code $result\n"; - die; - } + check_call($cmd); opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!"; my @shards = grep { /^mapinput\./ } readdir(DIR); closedir DIR; @@ -360,11 +352,7 @@ while (1){ my $script = "$MAPPER -s $srcFile -l $metric $refs_comma_sep < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput"; if ($run_local) { print STDERR "COMMAND:\n$script\n"; - $result = system($script); - unless ($result == 0){ - cleanup(); - die "ERROR: mapper returned non-zero exit code $result\n"; - } + check_bash_call($script); } elsif ($use_make) { my $script_file = "$dir/scripts/map.$shard"; open F, ">$script_file" or die "Can't write $script_file: $!"; @@ -384,13 +372,13 @@ while (1){ if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } $nmappers++; - my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; - my $jobid = `$qcmd`; + my $qcmd = "QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; + my $jobid = check_output("$qcmd"); die "qsub failed: $!\nCMD was: $qcmd" unless $? == 0; chomp $jobid; $jobid =~ s/^(\d+)(.*?)$/\1/g; $jobid =~ s/^Your job (\d+) .*$/\1/; - push(@cleanupcmds, "`qdel $jobid 2> /dev/null`"); + push(@cleanupcmds, check_output("qdel $jobid 2> /dev/null")); print STDERR " $jobid"; if ($joblist == "") { $joblist = $jobid; } else {$joblist = $joblist . "\|" . $jobid; } @@ -403,18 +391,14 @@ while (1){ close $mkfile; my $mcmd = "make -j $use_make -f $mkfilename"; print STDERR "\nExecuting: $mcmd\n"; - $result = system($mcmd); - unless ($result == 0){ - cleanup(); - die "ERROR: make command returned non-zero exit code $result\n"; - } + check_call($mcmd); } else { print STDERR "\nLaunched $nmappers mappers.\n"; sleep 8; print STDERR "Waiting for mappers to complete...\n"; while ($nmappers > 0) { sleep 5; - my @livejobs = grep(/$joblist/, split(/\n/, `qstat | grep -v ' C '`)); + my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat | grep -v ' C '"))); $nmappers = scalar @livejobs; } print STDERR "All mappers complete.\n"; @@ -430,16 +414,12 @@ while (1){ } print STDERR "Results for $tol/$til lines\n"; print STDERR "\nSORTING AND RUNNING VEST REDUCER\n"; - print STDERR `date`; + print STDERR unchecked_output("date"); $cmd="sort -t \$'\\t' -k 1 @mapoutputs | $REDUCER -l $metric > $dir/redoutput.$im1"; print STDERR "COMMAND:\n$cmd\n"; - $result = system($cmd); - unless ($result == 0){ - cleanup(); - die "ERROR: reducer command returned non-zero exit code $result\n"; - } + check_bash_call($cmd); $cmd="sort -nk3 $DIR_FLAG '-t|' $dir/redoutput.$im1 | head -1"; - my $best=`$cmd`; chomp $best; + my $best=check_bash_output("$cmd"); chomp $best; print STDERR "$best\n"; my ($oa, $x, $xscore) = split /\|/, $best; $score = $xscore; @@ -472,11 +452,11 @@ while (1){ my $v = ($ori{$k} + $axi{$k} * $x) / $norm; print W "$k $v\n"; } - `rm -rf $dir/splag.$im1`; + check_call("rm -rf $dir/splag.$im1"); $inweights = $finalFile; } $lastWeightsFile = "$dir/weights.$iteration"; - `cp $inweights $lastWeightsFile`; + check_call("cp $inweights $lastWeightsFile"); if ($icc < 2) { print STDERR "\nREACHED STOPPING CRITERION: score change too little\n"; last; @@ -520,7 +500,7 @@ sub get_lines { sub get_comma_sep_refs { my ($r,$p) = @_; - my $o = `echo $p`; + my $o = check_output("echo $p"); chomp $o; my @files = split /\s+/, $o; return "-$r " . join(" -$r ", @files); @@ -607,7 +587,7 @@ sub enseg { sub print_help { - my $executable = `basename $0`; chomp $executable; + my $executable = check_output("basename $0"); chomp $executable; print << "Help"; Usage: $executable [options] <ini file> |