summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Clark <jon.h.clark@gmail.com>2011-03-24 09:48:04 -0400
committerJonathan Clark <jon.h.clark@gmail.com>2011-03-24 09:48:04 -0400
commitba4f147f84aa0d4623da640a2d0de7e6242a53af (patch)
treeded3a9ed85ef128a5cf239126da28ee3b1a5a3fc
parent70d909f695fdb8207ce251bae9e860c3787d7711 (diff)
parentcb8e10b896d4d19a3d7c9b997a74f9bd39a5c714 (diff)
Undo some silly local changes so we can pull
-rw-r--r--README.scons21
-rw-r--r--compound-split/de/charlm.rev.5gm.de.klmbin14148755 -> 17376695 bytes
-rw-r--r--decoder/Makefile.am2
-rw-r--r--decoder/ff_wordset.h6
-rw-r--r--environment/LocalConfig.pm13
-rw-r--r--klm/lm/test.arpa24
-rw-r--r--training/augment_grammar.cc9
-rwxr-xr-xvest/dist-vest.pl112
-rw-r--r--vest/libcall.pl71
-rwxr-xr-xvest/parallelize.pl33
10 files changed, 199 insertions, 92 deletions
diff --git a/README.scons b/README.scons
new file mode 100644
index 00000000..eb56a666
--- /dev/null
+++ b/README.scons
@@ -0,0 +1,21 @@
+Building cdec with the scons build system:
+
+While we don't build all components of cdec under scons (yet),
+scons makes things much simpler and reliable. Currently,
+building cdec with scons is experimental.
+
+Step-by-step:
+1) Obtain a Python V2.4 or later (you probably already have this)
+2) Download and install Scons v2.0+ from http://www.scons.org/download.php
+ $ wget http://prdownloads.sourceforge.net/scons/scons-2.0.1.tar.gz
+ $ tar -xvzf scons-2.0.1.tar.gz
+ $ cd scons-2.0.1
+ $ python setup.py build
+ $ sudo setup.py install #(if you have root)
+ $ setup.py install --home=~/prefix #(to install to a user-space prefix instead)
+
+3) Run scons. Some examples:
+ $ scons #(just build the decoder)
+ $ scons -j16 #(build using 16 cores)
+ $ scons -h #(get help)
+ $ scons --with-boost=~/prefix #(use ~/prefix as the boost directory)
diff --git a/compound-split/de/charlm.rev.5gm.de.klm b/compound-split/de/charlm.rev.5gm.de.klm
index 574898dc..e8d114bd 100644
--- a/compound-split/de/charlm.rev.5gm.de.klm
+++ b/compound-split/de/charlm.rev.5gm.de.klm
Binary files differ
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index f43e6894..9cf4c3c4 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -82,5 +82,5 @@ libcdec_a_SOURCES = \
if GLC
# Until we build GLC as a library...
- libcdec_a_SOURCES += ff_glc.cc feature-factory.cc string_util.cc
+ libcdec_a_SOURCES += ff_glc.cc
endif
diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h
index 256d54bb..00e1145b 100644
--- a/decoder/ff_wordset.h
+++ b/decoder/ff_wordset.h
@@ -13,13 +13,12 @@
class WordSet : public FeatureFunction {
public:
-
// we depend on the order of the initializer list
// to call member constructurs in the proper order
// modify this carefully!
//
// Usage: "WordSet -v vocab.txt [--oov]"
- WordSet(const std::string& param) {
+ WordSet(const std::string& param) {
std::string vocabFile;
std::string featName;
parseArgs(param, &featName, &vocabFile, &oovMode_);
@@ -30,6 +29,9 @@ class WordSet : public FeatureFunction {
loadVocab(vocabFile, &vocab_);
}
+ ~WordSet() {
+ }
+
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm
index f365319c..7b3d950c 100644
--- a/environment/LocalConfig.pm
+++ b/environment/LocalConfig.pm
@@ -12,6 +12,10 @@ my $host = domainname;
# keys are: HOST_REGEXP, MERTMem, QSubQueue, QSubMemFlag, QSubExtraFlags
my $CCONFIG = {
+ 'StarCluster' => {
+ 'HOST_REGEXP' => qr/compute-\d+\.internal$/,
+ 'QSubMemFlag' => '-l mem',
+ },
'LTICluster' => {
'HOST_REGEXP' => qr/^cluster\d+\.lti\.cs\.cmu\.edu$/,
'QSubMemFlag' => '-l h_vmem=',
@@ -36,9 +40,13 @@ my $CCONFIG = {
'HOST_REGEXP' => qr/^(tg-login1.blacklight.psc.teragrid.org|blacklight.psc.edu|bl1.psc.teragrid.org|bl0.psc.teragrid.org)$/,
'QSubMemFlag' => '-l pmem=',
},
+ 'Barrow/Chicago' => {
+ 'HOST_REGEXP' => qr/^(barrow|chicago).lti.cs.cmu.edu$/,
+ 'QSubMemFlag' => '-l pmem=',
+ },
'LOCAL' => {
- 'HOST_REGEXP' => qr/local\.net$/,
- 'QSubMemFlag' => '',
+ 'HOST_REGEXP' => qr/local\./,
+ 'QSubMemFlag' => ' ',
},
};
@@ -63,6 +71,7 @@ sub environment_name {
sub qsub_args {
my $mem = shift @_;
die "qsub_args requires a memory amount as a parameter, e.g. 4G" unless $mem;
+ return 'qsub -V -cwd' if environment_name() eq 'StarCluster';
my $mf = $CONFIG{'QSubMemFlag'} or die "QSubMemFlag not set for $senvironment_name";
my $cmd = "qsub -S /bin/bash ${mf}${mem}";
if ($CONFIG{'QSubQueue'}) { $cmd .= ' ' . $CONFIG{'QSubQueue'}; }
diff --git a/klm/lm/test.arpa b/klm/lm/test.arpa
index 9d674e83..ef214eae 100644
--- a/klm/lm/test.arpa
+++ b/klm/lm/test.arpa
@@ -1,17 +1,17 @@
\data\
-ngram 1=34
-ngram 2=43
-ngram 3=8
-ngram 4=5
-ngram 5=3
+ngram 1=37
+ngram 2=47
+ngram 3=11
+ngram 4=6
+ngram 5=4
\1-grams:
-1.383514 , -0.30103
-1.139057 . -0.845098
-1.029493 </s>
-99 <s> -0.4149733
--1.995635 <unk>
+-1.995635 <unk> -20
-1.285941 a -0.69897
-1.687872 also -0.30103
-1.687872 beyond -0.30103
@@ -41,6 +41,9 @@ ngram 5=3
-1.687872 watching -0.30103
-1.687872 what -0.30103
-1.687872 would -0.30103
+-3.141592 foo
+-2.718281 bar 3.0
+-6.535897 baz -0.0
\2-grams:
-0.6925742 , .
@@ -86,6 +89,10 @@ ngram 5=3
-0.2922095 watching considering
-0.2922095 what i
-0.2922095 would also
+-2 also would -6
+-15 <unk> <unk> -2
+-4 <unk> however -1
+-6 foo bar
\3-grams:
-0.01916512 more . </s>
@@ -96,6 +103,9 @@ ngram 5=3
-0.3488368 <s> looking on -0.4771212
-0.1892331 little more loin
-0.04835128 looking on a -0.4771212
+-3 also would consider -7
+-6 <unk> however <unk> -12
+-7 to look good
\4-grams:
-0.009249173 looking on a little -0.4771212
@@ -103,10 +113,12 @@ ngram 5=3
-0.005464747 screening a little more
-0.1453306 a little more loin
-0.01552657 <s> looking on a -0.4771212
+-4 also would consider higher -8
\5-grams:
-0.003061223 <s> looking on a little
-0.001813953 looking on a little more
-0.0432557 on a little more loin
+-5 also would consider higher looking
\end\
diff --git a/training/augment_grammar.cc b/training/augment_grammar.cc
index 19120d00..9ad03b6c 100644
--- a/training/augment_grammar.cc
+++ b/training/augment_grammar.cc
@@ -36,6 +36,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
("source_lm,l",po::value<string>(),"Source language LM (KLM)")
("collapse_weights,w",po::value<string>(), "Collapse weights into a single feature X using the coefficients from this weights file")
("add_shape_types,s", "Add rule shape types")
+ ("extra_lex_feature,x", "Experimental nonlinear lexical weighting feature")
("replace_files,r", "Replace files with transformed variants (requires loading full grammar into memory)")
("grammar,g", po::value<vector<string> >(), "Input (also output) grammar file(s)");
po::options_description clo("Command line options");
@@ -85,6 +86,7 @@ template <class Model> float Score(const vector<WordID>& str, const Model &model
return total;
}
+bool extra_feature;
int kSrcLM;
vector<double> col_weights;
bool gather_rules;
@@ -94,9 +96,15 @@ static void RuleHelper(const TRulePtr& new_rule, const unsigned int ctf_level, c
static const int kSrcLM = FD::Convert("SrcLM");
static const int kPC = FD::Convert("PC");
static const int kX = FD::Convert("X");
+ static const int kPhraseModel2 = FD::Convert("PhraseModel_1");
+ static const int kNewLex = FD::Convert("NewLex");
TRulePtr r; r.reset(new TRule(*new_rule));
if (ngram) r->scores_.set_value(kSrcLM, Score(r->f_, *ngram));
r->scores_.set_value(kPC, 1.0);
+ if (extra_feature) {
+ float v = r->scores_.value(kPhraseModel2);
+ r->scores_.set_value(kNewLex, v*(v+1));
+ }
if (col_weights.size()) {
double score = r->scores_.dot(col_weights);
r->scores_.clear();
@@ -122,6 +130,7 @@ int main(int argc, char** argv) {
cerr << "Loaded " << (int)ngram->Order() << "-gram KenLM (MapSize=" << word_map.size() << ")\n";
cerr << " <s> = " << kSOS << endl;
} else { ngram = NULL; }
+ extra_feature = conf.count("extra_lex_feature") > 0;
if (conf.count("collapse_weights")) {
Weights w;
w.InitFromFile(conf["collapse_weights"].as<string>());
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index 2a56dd55..973a29ef 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -1,16 +1,18 @@
#!/usr/bin/env perl
-
use strict;
my @ORIG_ARGV=@ARGV;
use Cwd qw(getcwd);
my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
+
+# Skip local config (used for distributing jobs) if we're running in local-only mode
use LocalConfig;
use Getopt::Long;
use IPC::Open2;
-use strict;
use POSIX ":sys_wait_h";
my $QSUB_CMD = qsub_args(mert_memory());
+require "libcall.pl";
+
# Default settings
my $srcFile;
my $refFiles;
@@ -22,6 +24,7 @@ my $MAPINPUT = "$bin_dir/mr_vest_generate_mapper_input";
my $MAPPER = "$bin_dir/mr_vest_map";
my $REDUCER = "$bin_dir/mr_vest_reduce";
my $parallelize = "$bin_dir/parallelize.pl";
+my $libcall = "$bin_dir/libcall.pl";
my $sentserver = "$bin_dir/sentserver";
my $sentclient = "$bin_dir/sentclient";
my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm";
@@ -31,6 +34,7 @@ die "Can't find $MAPPER" unless -x $MAPPER;
my $cdec = "$bin_dir/../decoder/cdec";
die "Can't find decoder in $cdec" unless -x $cdec;
die "Can't find $parallelize" unless -x $parallelize;
+die "Can't find $libcall" unless -e $libcall;
my $decoder = $cdec;
my $lines_per_mapper = 400;
my $rand_directions = 15;
@@ -124,7 +128,7 @@ sub enseg;
sub print_help;
my $nodelist;
-my $host =`hostname`; chomp $host;
+my $host =check_output("hostname"); chomp $host;
my $bleu;
my $interval_count = 0;
my $logfile;
@@ -142,7 +146,7 @@ unless ($dir){
$dir = "vest";
}
unless ($dir =~ /^\//){ # convert relative path to absolute path
- my $basedir = `pwd`;
+ my $basedir = check_output("pwd");
chomp $basedir;
$dir = "$basedir/$dir";
}
@@ -158,15 +162,18 @@ my @cleanupcmds = ();
sub cleanup {
print STDERR "Cleanup...\n";
- for my $pid (@childpids){ `kill $pid`; }
- for my $cmd (@cleanupcmds){`$cmd`; }
+ for my $pid (@childpids){ unchecked_call("kill $pid"); }
+ for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); }
exit 1;
};
+# Always call cleanup, no matter how we exit
+*CORE::GLOBAL::exit =
+ sub{ cleanup(); };
$SIG{INT} = "cleanup";
$SIG{TERM} = "cleanup";
$SIG{HUP} = "cleanup";
-my $decoderBase = `basename $decoder`; chomp $decoderBase;
+my $decoderBase = check_output("basename $decoder"); chomp $decoderBase;
my $newIniFile = "$dir/$decoderBase.ini";
my $inputFileName = "$dir/input";
my $user = $ENV{"USER"};
@@ -181,12 +188,12 @@ use File::Basename qw(basename);
sub modbin {
local $_;
my $bindir=shift;
- `mkdir -p $bindir`;
+ check_call("mkdir -p $bindir");
-d $bindir || die "couldn't make bindir $bindir";
for (@_) {
my $src=$$_;
$$_="$bindir/".basename($src);
- `cp -p $src $$_`;
+ check_call("cp -p $src $$_");
die "cp $src $$_ failed: $!" unless $? == 0;
}
}
@@ -203,7 +210,7 @@ if ($dryrun){
} else {
-e $dir || mkdir $dir;
mkdir "$dir/hgs";
- modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$REDUCER,\$parallelize,\$sentserver,\$sentclient) if $cpbin;
+ modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$REDUCER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin;
mkdir "$dir/scripts";
my $cmdfile="$dir/rerun-vest.sh";
open CMD,'>',$cmdfile;
@@ -219,7 +226,7 @@ if ($dryrun){
print_help();
exit;
}
- `cp $initialWeights $dir/weights.0`;
+ check_call("cp $initialWeights $dir/weights.0");
die "Can't find weights.0" unless (-e "$dir/weights.0");
}
write_config(*STDERR);
@@ -227,7 +234,7 @@ if ($dryrun){
# Generate initial files and values
-`cp $iniFile $newIniFile`;
+check_call("cp $iniFile $newIniFile");
$iniFile = $newIniFile;
my $newsrc = "$dir/dev.input";
@@ -259,12 +266,12 @@ while (1){
my $logdir="$dir/logs.$iteration";
my $decoderLog="$logdir/decoder.sentserver.log.$iteration";
my $scorerLog="$logdir/scorer.log.$iteration";
- `mkdir -p $logdir`;
+ check_call("mkdir -p $logdir");
#decode
print STDERR "RUNNING DECODER AT ";
- print STDERR `date`;
+ print STDERR unchecked_output("date");
my $im1 = $iteration - 1;
my $weightsFile="$dir/weights.$im1";
my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs";
@@ -275,33 +282,28 @@ while (1){
if ($run_local) {
$pcmd = "cat $srcFile |";
} elsif ($use_make) {
- $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $decode_nodes --";
+ # TODO: Throw error when decode_nodes is specified along with use_make
+ $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $use_make --";
} else {
$pcmd = "cat $srcFile | $parallelize $usefork -p $pmem -e $logdir -j $decode_nodes --";
}
my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile";
print STDERR "COMMAND:\n$cmd\n";
- my $result = 0;
- $result = system($cmd);
- unless ($result == 0){
- cleanup();
- print STDERR "ERROR: Parallel decoder returned non-zero exit code $result\n";
- die;
- }
- my $num_hgs = `ls $dir/hgs/*.gz | wc -l`;
+ check_bash_call($cmd);
+ my $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l");
print STDERR "NUMBER OF HGs: $num_hgs\n";
die "Dev set contains $devSize sentences! Decoder failure?\n" if ($devSize != $num_hgs);
- my $dec_score = `cat $runFile | $SCORER $refs_comma_sep -l $metric`;
+ my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -l $metric");
chomp $dec_score;
print STDERR "DECODER SCORE: $dec_score\n";
# save space
- `gzip -f $runFile`;
- `gzip -f $decoderLog`;
+ check_call("gzip -f $runFile");
+ check_call("gzip -f $decoderLog");
# run optimizer
print STDERR "RUNNING OPTIMIZER AT ";
- print STDERR `date`;
+ print STDERR unchecked_output("date");
my $mergeLog="$logdir/prune-merge.log.$iteration";
my $score = 0;
@@ -309,28 +311,18 @@ while (1){
my $inweights="$dir/weights.$im1";
for (my $opt_iter=1; $opt_iter<$optimization_iters; $opt_iter++) {
print STDERR "\nGENERATE OPTIMIZATION STRATEGY (OPT-ITERATION $opt_iter/$optimization_iters)\n";
- print STDERR `date`;
+ print STDERR unchecked_output("date");
$icc++;
my $nop=$noprimary?"--no_primary":"";
my $targs=$oraclen ? "--decoder_translations='$runFile.gz' ".get_comma_sep_refs('-references',$refFiles):"";
my $bwargs=$bleu_weight!=1 ? "--bleu_weight=$bleu_weight":"";
$cmd="$MAPINPUT -w $inweights -r $dir/hgs $bwargs -s $devSize -d $rand_directions --max_similarity=$maxsim --oracle_directions=$oraclen --oracle_batch=$oracleb $targs $dirargs > $dir/agenda.$im1-$opt_iter";
print STDERR "COMMAND:\n$cmd\n";
- $result = system($cmd);
- unless ($result == 0){
- cleanup();
- die "ERROR: mapinput command returned non-zero exit code $result\n";
- }
-
- `mkdir -p $dir/splag.$im1`;
+ check_call($cmd);
+ check_call("mkdir -p $dir/splag.$im1");
$cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1-$opt_iter $dir/splag.$im1/mapinput.";
print STDERR "COMMAND:\n$cmd\n";
- $result = system($cmd);
- unless ($result == 0){
- cleanup();
- print STDERR "ERROR: split command returned non-zero exit code $result\n";
- die;
- }
+ check_call($cmd);
opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!";
my @shards = grep { /^mapinput\./ } readdir(DIR);
closedir DIR;
@@ -360,11 +352,7 @@ while (1){
my $script = "$MAPPER -s $srcFile -l $metric $refs_comma_sep < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput";
if ($run_local) {
print STDERR "COMMAND:\n$script\n";
- $result = system($script);
- unless ($result == 0){
- cleanup();
- die "ERROR: mapper returned non-zero exit code $result\n";
- }
+ check_bash_call($script);
} elsif ($use_make) {
my $script_file = "$dir/scripts/map.$shard";
open F, ">$script_file" or die "Can't write $script_file: $!";
@@ -384,13 +372,13 @@ while (1){
if ($first_shard) { print STDERR "$script\n"; $first_shard=0; }
$nmappers++;
- my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file";
- my $jobid = `$qcmd`;
+ my $qcmd = "QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file";
+ my $jobid = check_output("$qcmd");
die "qsub failed: $!\nCMD was: $qcmd" unless $? == 0;
chomp $jobid;
$jobid =~ s/^(\d+)(.*?)$/\1/g;
$jobid =~ s/^Your job (\d+) .*$/\1/;
- push(@cleanupcmds, "`qdel $jobid 2> /dev/null`");
+ push(@cleanupcmds, check_output("qdel $jobid 2> /dev/null"));
print STDERR " $jobid";
if ($joblist == "") { $joblist = $jobid; }
else {$joblist = $joblist . "\|" . $jobid; }
@@ -403,18 +391,14 @@ while (1){
close $mkfile;
my $mcmd = "make -j $use_make -f $mkfilename";
print STDERR "\nExecuting: $mcmd\n";
- $result = system($mcmd);
- unless ($result == 0){
- cleanup();
- die "ERROR: make command returned non-zero exit code $result\n";
- }
+ check_call($mcmd);
} else {
print STDERR "\nLaunched $nmappers mappers.\n";
sleep 8;
print STDERR "Waiting for mappers to complete...\n";
while ($nmappers > 0) {
sleep 5;
- my @livejobs = grep(/$joblist/, split(/\n/, `qstat | grep -v ' C '`));
+ my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat | grep -v ' C '")));
$nmappers = scalar @livejobs;
}
print STDERR "All mappers complete.\n";
@@ -430,16 +414,12 @@ while (1){
}
print STDERR "Results for $tol/$til lines\n";
print STDERR "\nSORTING AND RUNNING VEST REDUCER\n";
- print STDERR `date`;
+ print STDERR unchecked_output("date");
$cmd="sort -t \$'\\t' -k 1 @mapoutputs | $REDUCER -l $metric > $dir/redoutput.$im1";
print STDERR "COMMAND:\n$cmd\n";
- $result = system($cmd);
- unless ($result == 0){
- cleanup();
- die "ERROR: reducer command returned non-zero exit code $result\n";
- }
+ check_bash_call($cmd);
$cmd="sort -nk3 $DIR_FLAG '-t|' $dir/redoutput.$im1 | head -1";
- my $best=`$cmd`; chomp $best;
+ my $best=check_bash_output("$cmd"); chomp $best;
print STDERR "$best\n";
my ($oa, $x, $xscore) = split /\|/, $best;
$score = $xscore;
@@ -472,11 +452,11 @@ while (1){
my $v = ($ori{$k} + $axi{$k} * $x) / $norm;
print W "$k $v\n";
}
- `rm -rf $dir/splag.$im1`;
+ check_call("rm -rf $dir/splag.$im1");
$inweights = $finalFile;
}
$lastWeightsFile = "$dir/weights.$iteration";
- `cp $inweights $lastWeightsFile`;
+ check_call("cp $inweights $lastWeightsFile");
if ($icc < 2) {
print STDERR "\nREACHED STOPPING CRITERION: score change too little\n";
last;
@@ -520,7 +500,7 @@ sub get_lines {
sub get_comma_sep_refs {
my ($r,$p) = @_;
- my $o = `echo $p`;
+ my $o = check_output("echo $p");
chomp $o;
my @files = split /\s+/, $o;
return "-$r " . join(" -$r ", @files);
@@ -607,7 +587,7 @@ sub enseg {
sub print_help {
- my $executable = `basename $0`; chomp $executable;
+ my $executable = check_output("basename $0"); chomp $executable;
print << "Help";
Usage: $executable [options] <ini file>
diff --git a/vest/libcall.pl b/vest/libcall.pl
new file mode 100644
index 00000000..c7d0f128
--- /dev/null
+++ b/vest/libcall.pl
@@ -0,0 +1,71 @@
+use IPC::Open3;
+use Symbol qw(gensym);
+
+$DUMMY_STDERR = gensym();
+$DUMMY_STDIN = gensym();
+
+# Run the command and ignore failures
+sub unchecked_call {
+ system("@_")
+}
+
+# Run the command and return its output, if any ignoring failures
+sub unchecked_output {
+ return `@_`
+}
+
+# WARNING: Do not use this for commands that will return large amounts
+# of stdout or stderr -- they might block indefinitely
+sub check_output {
+ print STDERR "Executing and gathering output: @_\n";
+
+ my $pid = open3($DUMMY_STDIN, \*PH, $DUMMY_STDERR, @_);
+ my $proc_output = "";
+ while( <PH> ) {
+ $proc_output .= $_;
+ }
+ waitpid($pid, 0);
+ # TODO: Grab signal that the process died from
+ my $child_exit_status = $? >> 8;
+ if($child_exit_status == 0) {
+ return $proc_output;
+ } else {
+ print STDERR "ERROR: Execution of @_ failed.\n";
+ exit(1);
+ }
+}
+
+# Based on Moses' safesystem sub
+sub check_call {
+ print STDERR "Executing: @_\n";
+ system(@_);
+ my $exitcode = $? >> 8;
+ if($exitcode == 0) {
+ return 0;
+ } elsif ($? == -1) {
+ print STDERR "ERROR: Failed to execute: @_\n $!\n";
+ exit(1);
+
+ } elsif ($? & 127) {
+ printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n",
+ ($? & 127), ($? & 128) ? 'with' : 'without';
+ exit(1);
+
+ } else {
+ print STDERR "Failed with exit code: $exitcode\n" if $exitcode;
+ exit($exitcode);
+ }
+}
+
+sub check_bash_call {
+ my @args = ( "bash", "-auxeo", "pipefail", "-c", "@_");
+ check_call(@args);
+}
+
+sub check_bash_output {
+ my @args = ( "bash", "-auxeo", "pipefail", "-c", "@_");
+ return check_output(@args);
+}
+
+# perl module weirdness...
+return 1;
diff --git a/vest/parallelize.pl b/vest/parallelize.pl
index cb5406ec..47b77c79 100755
--- a/vest/parallelize.pl
+++ b/vest/parallelize.pl
@@ -28,6 +28,12 @@ use IPC::Open2;
use strict;
use POSIX ":sys_wait_h";
+use File::Basename;
+my $myDir = dirname(__FILE__);
+print STDERR __FILE__." -> $myDir\n";
+push(@INC, $myDir);
+require "libcall.pl";
+
my $tailn=5; # +0 = concatenate all the client logs. 5 = last 5 lines
my $recycle_clients; # spawn new clients when previous ones terminate
my $stay_alive; # dont let server die when having zero clients
@@ -76,7 +82,7 @@ sub preview_files {
my @f=grep { ! ($skipempty && -z $_) } @$l;
my $fn=join(' ',map {escape_shell($_)} @f);
my $cmd="tail -n $n $fn";
- `$cmd`.($footer?"\nNONEMPTY FILES:\n$fn\n":"");
+ check_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":"");
}
sub prefix_dirname($) {
#like `dirname but if ends in / then return the whole thing
@@ -105,7 +111,7 @@ sub extend_path($$;$$) {
$dir=prefix_dirname($base);
}
my @cmd=("/bin/mkdir","-p",$dir);
- system(@cmd) if $mkdir;
+ check_call(@cmd) if $mkdir;
}
return $base.$ext;
}
@@ -142,7 +148,7 @@ my $prog=shift;
if ($no_which) {
$cmd=$prog;
} else {
- $cmd=`which $prog`;
+ $cmd=check_output("which $prog");
chomp $cmd;
die "$prog not found - $cmd" unless $cmd;
}
@@ -156,7 +162,7 @@ my $cdcmd=$no_cd ? '' : ("cd ".escape_shell($abscwd)."\n");
my $executable = $cmd;
$executable =~ s/^\s*(\S+)($|\s.*)/$1/;
-$executable=`basename $executable`;
+$executable=check_output("basename $executable");
chomp $executable;
@@ -192,10 +198,10 @@ sub launch_job_on_node;
# vars
-my $mydir = `dirname $0`; chomp $mydir;
+my $mydir = check_output("dirname $0"); chomp $mydir;
my $sentserver = "$mydir/sentserver";
my $sentclient = "$mydir/sentclient";
-my $host = `hostname`;
+my $host = check_output("hostname");
chomp $host;
@@ -205,7 +211,7 @@ my $port = 50300+int(rand($randp));
my $endp=$port+$tryp;
sub listening_port_lines {
my $quiet=$verbose?'':'2>/dev/null';
- `netstat -a -n $quiet | grep LISTENING | grep -i tcp`
+ return unchecked_output("netstat -a -n $quiet | grep LISTENING | grep -i tcp");
}
my $netstat=&listening_port_lines;
@@ -270,17 +276,14 @@ $cdcmd$sentclient $host:$port:$key $cmd
# my $todo = "$sentserver -k $key $multiflag $port ";
my $todo = "$sentserver -k $key $multiflag $port $stay_alive_flag ";
if ($verbose){ print STDERR "Running: $todo\n"; }
- my $rc = system($todo);
- if ($rc){
- die "Error: sentserver returned code $rc\n";
- }
+ check_call($todo);
}
sub numof_live_jobs {
if ($use_fork) {
die "not implemented";
} else {
- my @livejobs = grep(/$joblist/, split(/\n/, `qstat`));
+ my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat")));
return ($#livejobs + 1);
}
}
@@ -320,7 +323,7 @@ sub launch_job {
}
if ($joblist == "") { $joblist = $jobid; }
else {$joblist = $joblist . "\|" . $jobid; }
- my $cleanfn="`qdel $jobid 2> /dev/null`";
+ my $cleanfn=check_output("qdel $jobid 2> /dev/null");
push(@cleanup_cmds, $cleanfn);
}
close QOUT;
@@ -345,7 +348,7 @@ sub launch_job_fork {
close $fh;
my $todo = "/bin/sh $scr_name 1> $outfile 2> $errorfile";
print STDERR "EXEC: $todo\n";
- my $out = `$todo`;
+ my $out = check_output("$todo");
print STDERR "RES: $out\n";
unlink $scr_name or warn "Failed to remove $scr_name";
exit 0;
@@ -377,7 +380,7 @@ sub cleanup {
sub print_help
{
- my $name = `basename $0`; chomp $name;
+ my $name = check_output("basename $0"); chomp $name;
print << "Help";
usage: $name [options]