summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-11-11 14:13:26 -0500
committerChris Dyer <cdyer@cs.cmu.edu>2011-11-11 14:13:26 -0500
commitcb762a9c0e50e4e49b688dcc3f52498191efb20a (patch)
treef9dfe9e899e0262106b41b14028ec9d8a1d22e29
parent5c019bda38cb19e09cb16c5eefa6793d920015eb (diff)
new improved distributed operation for PRO, MERT
-rw-r--r--environment/LocalConfig.pm43
-rwxr-xr-xpro-train/dist-pro.pl77
-rwxr-xr-xvest/dist-vest.pl96
3 files changed, 112 insertions, 104 deletions
diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm
index 4e5e0d5f..831a3a43 100644
--- a/environment/LocalConfig.pm
+++ b/environment/LocalConfig.pm
@@ -4,7 +4,7 @@ use strict;
use warnings;
use base 'Exporter';
-our @EXPORT = qw( qsub_args mert_memory environment_name );
+our @EXPORT = qw( qsub_args mert_memory environment_name env_default_jobs has_qsub );
use Net::Domain qw(hostname hostfqdn hostdomain domainname);
@@ -14,47 +14,62 @@ my $host = domainname;
my $CCONFIG = {
'StarCluster' => {
'HOST_REGEXP' => qr/compute-\d+\.internal$/,
+ 'JobControl' => 'qsub',
'QSubMemFlag' => '-l mem',
+ 'DefaultJobs' => 20,
},
'LTICluster' => {
'HOST_REGEXP' => qr/^cluster\d+\.lti\.cs\.cmu\.edu$/,
+ 'JobControl' => 'qsub',
'QSubMemFlag' => '-l h_vmem=',
'QSubExtraFlags' => '-l walltime=0:45:00',
+ 'DefaultJobs' => 15,
#'QSubQueue' => '-q long',
},
'UMIACS' => {
'HOST_REGEXP' => qr/^d.*\.umiacs\.umd\.edu$/,
+ 'JobControl' => 'qsub',
'QSubMemFlag' => '-l pmem=',
'QSubQueue' => '-q batch',
'QSubExtraFlags' => '-l walltime=144:00:00',
+ 'DefaultJobs' => 15,
},
'CLSP' => {
'HOST_REGEXP' => qr/\.clsp\.jhu\.edu$/,
+ 'JobControl' => 'qsub',
'QSubMemFlag' => '-l mem_free=',
'MERTMem' => '9G',
+ 'DefaultJobs' => 15,
},
'Valhalla' => {
'HOST_REGEXP' => qr/^(thor|tyr)\.inf\.ed\.ac\.uk$/,
+ 'JobControl' => 'fork',
+ 'DefaultJobs' => 8,
},
'Blacklight' => {
'HOST_REGEXP' => qr/^(tg-login1.blacklight.psc.teragrid.org|blacklight.psc.edu|bl1.psc.teragrid.org|bl0.psc.teragrid.org)$/,
- 'QSubMemFlag' => '-l pmem=',
+ 'JobControl' => 'fork',
+ 'DefaultJobs' => 32,
},
'Barrow/Chicago' => {
'HOST_REGEXP' => qr/^(barrow|chicago).lti.cs.cmu.edu$/,
- 'QSubMemFlag' => '-l pmem=',
+ 'JobControl' => 'fork',
+ 'DefaultJobs' => 8,
},
'OxfordDeathSnakes' => {
'HOST_REGEXP' => qr/^(taipan|tiger).cs.ox.ac.uk$/,
- 'QSubMemFlag' => '-l pmem=',
+ 'JobControl' => 'fork',
+ 'DefaultJobs' => 12,
},
- 'LOCAL' => {
- 'HOST_REGEXP' => qr/local\./,
+ 'LOCAL' => { # LOCAL must be last in the list!!!
+ 'HOST_REGEXP' => qr//,
'QSubMemFlag' => ' ',
+ 'JobControl' => 'fork',
+ 'DefaultJobs' => 2,
},
};
-our $senvironment_name;
+our $senvironment_name = 'LOCAL';
for my $config_key (keys %$CCONFIG) {
my $re = $CCONFIG->{$config_key}->{'HOST_REGEXP'};
die "Can't find HOST_REGEXP for $config_key" unless $re;
@@ -63,15 +78,23 @@ for my $config_key (keys %$CCONFIG) {
}
}
-die "NO ENVIRONMENT INFO FOR HOST: $host\nPLEASE EDIT LocalConfig.pm\n" unless $senvironment_name;
-
our %CONFIG = %{$CCONFIG->{$senvironment_name}};
-print STDERR "**Environment: $senvironment_name\n";
+print STDERR "**Environment: $senvironment_name";
+print STDERR " (has qsub)" if has_qsub();
+print STDERR "\n";
+
+sub has_qsub {
+ return ($CONFIG{'JobControl'} eq 'qsub');
+}
sub environment_name {
return $senvironment_name;
}
+sub env_default_jobs {
+ return 1 * $CONFIG{'DefaultJobs'};
+}
+
sub qsub_args {
my $mem = shift @_;
die "qsub_args requires a memory amount as a parameter, e.g. 4G" unless $mem;
diff --git a/pro-train/dist-pro.pl b/pro-train/dist-pro.pl
index 6332563f..5db053de 100755
--- a/pro-train/dist-pro.pl
+++ b/pro-train/dist-pro.pl
@@ -10,6 +10,7 @@ use Getopt::Long;
use IPC::Open2;
use POSIX ":sys_wait_h";
my $QSUB_CMD = qsub_args(mert_memory());
+my $default_jobs = env_default_jobs();
my $VEST_DIR="$SCRIPT_DIR/../vest";
require "$VEST_DIR/libcall.pl";
@@ -39,11 +40,11 @@ die "Can't find $libcall" unless -e $libcall;
my $decoder = $cdec;
my $lines_per_mapper = 30;
my $iteration = 1;
-my $run_local = 0;
my $best_weights;
my $psi = 1;
-my $max_iterations = 30;
-my $decode_nodes = 15; # number of decode nodes
+my $default_max_iter = 30;
+my $max_iterations = $default_max_iter;
+my $jobs = $default_jobs; # number of decode nodes
my $pmem = "4g";
my $disable_clean = 0;
my %seen_weights;
@@ -55,8 +56,8 @@ my $metric = "ibm_bleu";
my $dir;
my $iniFile;
my $weights;
-my $use_make; # use make to parallelize
-my $usefork;
+my $use_make = 1; # use make to parallelize
+my $useqsub = 0;
my $initial_weights;
my $pass_suffix = '';
my $cpbin=1;
@@ -69,10 +70,10 @@ my $reg_previous = 5000;
# Process command-line options
Getopt::Long::Configure("no_auto_abbrev");
if (GetOptions(
- "decode-nodes=i" => \$decode_nodes,
+ "jobs=i" => \$jobs,
"dont-clean" => \$disable_clean,
"pass-suffix=s" => \$pass_suffix,
- "use-fork" => \$usefork,
+ "qsub" => \$useqsub,
"dry-run" => \$dryrun,
"epsilon=s" => \$epsilon,
"interpolate-with-weights=f" => \$psi,
@@ -81,7 +82,6 @@ if (GetOptions(
"tune-regularizer" => \$tune_regularizer,
"reg=f" => \$reg,
"reg-previous=f" => \$reg_previous,
- "local" => \$run_local,
"use-make=i" => \$use_make,
"max-iterations=i" => \$max_iterations,
"pmem=s" => \$pmem,
@@ -97,7 +97,16 @@ if (GetOptions(
die "--tune-regularizer is no longer supported with --reg-previous and --reg. Please tune manually.\n" if $tune_regularizer;
-if ($usefork) { $usefork = "--use-fork"; } else { $usefork = ''; }
+if ($useqsub) {
+ $use_make = 0;
+ die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub();
+}
+
+my @missing_args = ();
+if (!defined $srcFile) { push @missing_args, "--source-file"; }
+if (!defined $refFiles) { push @missing_args, "--ref-files"; }
+if (!defined $initial_weights) { push @missing_args, "--weights"; }
+die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args);
if ($metric =~ /^(combi|ter)$/i) {
$lines_per_mapper = 5;
@@ -254,13 +263,10 @@ while (1){
`rm -f $dir/hgs/*.gz`;
my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs";
my $pcmd;
- if ($run_local) {
- $pcmd = "cat $srcFile |";
- } elsif ($use_make) {
- # TODO: Throw error when decode_nodes is specified along with use_make
- $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $use_make --";
+ if ($use_make) {
+ $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --";
} else {
- $pcmd = "cat $srcFile | $parallelize $usefork -p $pmem -e $logdir -j $decode_nodes --";
+ $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --";
}
my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile";
print STDERR "COMMAND:\n$cmd\n";
@@ -333,10 +339,7 @@ while (1){
push @mapoutputs, "$dir/splag.$im1/$mapoutput";
$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard";
my $script = "$MAPPER -s $srcFile -l $metric $refs_comma_sep -w $inweights -K $dir/kbest < $dir/splag.$im1/$shard > $dir/splag.$im1/$mapoutput";
- if ($run_local) {
- print STDERR "COMMAND:\n$script\n";
- check_bash_call($script);
- } elsif ($use_make) {
+ if ($use_make) {
my $script_file = "$dir/scripts/map.$shard";
open F, ">$script_file" or die "Can't write $script_file: $!";
print F "#!/bin/bash\n";
@@ -382,12 +385,10 @@ while (1){
} else {
@dev_outs = @mapoutputs;
}
- if ($run_local) {
- print STDERR "\nCompleted extraction of training exemplars.\n";
- } elsif ($use_make) {
+ if ($use_make) {
print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n";
close $mkfile;
- my $mcmd = "make -j $use_make -f $mkfilename";
+ my $mcmd = "make -j $jobs -f $mkfilename";
print STDERR "\nExecuting: $mcmd\n";
check_call($mcmd);
} else {
@@ -498,7 +499,7 @@ sub write_config {
print $fh "REFS (DEV): $refFiles\n";
print $fh "EVAL METRIC: $metric\n";
print $fh "MAX ITERATIONS: $max_iterations\n";
- print $fh "DECODE NODES: $decode_nodes\n";
+ print $fh "JOBS: $jobs\n";
print $fh "HEAD NODE: $host\n";
print $fh "PMEM (DECODING): $pmem\n";
print $fh "CLEANUP: $cleanup\n";
@@ -569,22 +570,12 @@ Required:
General options:
- --local
- Run the decoder and optimizer locally with a single thread.
-
- --use-make <I>
- Use make -j <I> to run the optimizer commands (useful on large
- shared-memory machines where qsub is unavailable).
-
- --decode-nodes <I>
- Number of decoder processes to run in parallel. [default=15]
-
--help
Print this message and exit.
--max-iterations <M>
Maximum number of iterations to run. If not specified, defaults
- to 30.
+ to $default_max_iter.
--metric <method>
Metric to optimize.
@@ -594,9 +585,6 @@ General options:
If the decoder is doing multi-pass decoding, the pass suffix "2",
"3", etc., is used to control what iteration of weights is set.
- --pmem <N>
- Amount of physical memory requested for parallel decoding jobs.
-
--workdir <dir>
Directory for intermediate and output files. If not specified, the
name is derived from the ini filename. Assuming that the ini
@@ -617,6 +605,19 @@ Regularization options:
to the previous iteration's weights the next iteration's weights
will be.
+Job control options:
+
+ --jobs <I>
+ Number of decoder processes to run in parallel. [default=$default_jobs]
+
+ --qsub
+ Use qsub to run jobs in parallel (qsub must be configured in
+ environment/LocalEnvironment.pm)
+
+ --pmem <N>
+ Amount of physical memory requested for parallel decoding jobs
+ (used with qsub requests only)
+
Deprecated options:
--interpolate-with-weights <F>
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index b7a862c4..11e791c1 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -16,6 +16,7 @@ require "libcall.pl";
# Default settings
my $srcFile;
my $refFiles;
+my $default_jobs = env_default_jobs();
my $bin_dir = $SCRIPT_DIR;
die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
my $FAST_SCORE="$bin_dir/../mteval/fast_score";
@@ -39,11 +40,10 @@ my $decoder = $cdec;
my $lines_per_mapper = 400;
my $rand_directions = 15;
my $iteration = 1;
-my $run_local = 0;
my $best_weights;
my $max_iterations = 15;
my $optimization_iters = 6;
-my $decode_nodes = 15; # number of decode nodes
+my $jobs = $default_jobs; # number of decode nodes
my $pmem = "9g";
my $disable_clean = 0;
my %seen_weights;
@@ -64,28 +64,25 @@ my $maxsim=0;
my $oraclen=0;
my $oracleb=20;
my $bleu_weight=1;
-my $use_make; # use make to parallelize line search
+my $use_make = 1; # use make to parallelize line search
my $dirargs='';
my $density_prune;
-my $usefork;
+my $useqsub;
my $pass_suffix = '';
my $cpbin=1;
# Process command-line options
Getopt::Long::Configure("no_auto_abbrev");
if (GetOptions(
"decoder=s" => \$decoderOpt,
- "decode-nodes=i" => \$decode_nodes,
+ "jobs=i" => \$jobs,
"density-prune=f" => \$density_prune,
"dont-clean" => \$disable_clean,
"pass-suffix=s" => \$pass_suffix,
- "use-fork" => \$usefork,
"dry-run" => \$dryrun,
"epsilon=s" => \$epsilon,
"help" => \$help,
"interval" => \$interval,
- "iteration=i" => \$iteration,
- "local" => \$run_local,
- "use-make=i" => \$use_make,
+ "qsub" => \$useqsub,
"max-iterations=i" => \$max_iterations,
"normalize=s" => \$normalize,
"pmem=s" => \$pmem,
@@ -114,7 +111,16 @@ if (defined $density_prune) {
die "--density_prune n: n must be greater than 1.0\n" unless $density_prune > 1.0;
}
-if ($usefork) { $usefork = "--use-fork"; } else { $usefork = ''; }
+if ($useqsub) {
+ $use_make = 0;
+ die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub();
+}
+
+my @missing_args = ();
+if (!defined $srcFile) { push @missing_args, "--source-file"; }
+if (!defined $refFiles) { push @missing_args, "--ref-files"; }
+if (!defined $initialWeights) { push @missing_args, "--weights"; }
+die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args);
if ($metric =~ /^(combi|ter)$/i) {
$lines_per_mapper = 40;
@@ -276,17 +282,11 @@ while (1){
my $im1 = $iteration - 1;
my $weightsFile="$dir/weights.$im1";
my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs";
- if ($density_prune) {
- $decoder_cmd .= " --density_prune $density_prune";
- }
my $pcmd;
- if ($run_local) {
- $pcmd = "cat $srcFile |";
- } elsif ($use_make) {
- # TODO: Throw error when decode_nodes is specified along with use_make
- $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $use_make --";
+ if ($use_make) {
+ $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --";
} else {
- $pcmd = "cat $srcFile | $parallelize $usefork -p $pmem -e $logdir -j $decode_nodes --";
+ $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --";
}
my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile";
print STDERR "COMMAND:\n$cmd\n";
@@ -365,10 +365,7 @@ while (1){
push @mapoutputs, "$dir/splag.$im1/$mapoutput";
$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard";
my $script = "$MAPPER -s $srcFile -l $metric $refs_comma_sep < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput";
- if ($run_local) {
- print STDERR "COMMAND:\n$script\n";
- check_bash_call($script);
- } elsif ($use_make) {
+ if ($use_make) {
my $script_file = "$dir/scripts/map.$shard";
open F, ">$script_file" or die "Can't write $script_file: $!";
print F "#!/bin/bash\n";
@@ -398,12 +395,10 @@ while (1){
else {$joblist = $joblist . "\|" . $jobid; }
}
}
- if ($run_local) {
- print STDERR "\nProcessing line search complete.\n";
- } elsif ($use_make) {
+ if ($use_make) {
print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n";
close $mkfile;
- my $mcmd = "make -j $use_make -f $mkfilename";
+ my $mcmd = "make -j $jobs -f $mkfilename";
print STDERR "\nExecuting: $mcmd\n";
check_call($mcmd);
} else {
@@ -558,7 +553,7 @@ sub write_config {
print $fh "EVAL METRIC: $metric\n";
print $fh "START ITERATION: $iteration\n";
print $fh "MAX ITERATIONS: $max_iterations\n";
- print $fh "DECODE NODES: $decode_nodes\n";
+ print $fh "PARALLEL JOBS: $jobs\n";
print $fh "HEAD NODE: $host\n";
print $fh "PMEM (DECODING): $pmem\n";
print $fh "CLEANUP: $cleanup\n";
@@ -612,37 +607,15 @@ sub print_help {
Usage: $executable [options] <ini file>
$executable [options] <ini file>
- Runs a complete MERT optimization and test set decoding, using
- the decoder configuration in ini file. Note that many of the
- options have default values that are inferred automatically
- based on certain conventions. For details, refer to descriptions
- of the options --decoder, --weights, and --workdir.
+ Runs a complete MERT optimization using the decoder configuration
+ in <ini file>. Required options are --weights, --source-file, and
+ --ref-files.
Options:
- --local
- Run the decoder and optimizer locally with a single thread.
-
- --use-make <I>
- Use make -j <I> to run the optimizer commands (useful on large
- shared-memory machines where qsub is unavailable).
-
- --decode-nodes <I>
- Number of decoder processes to run in parallel. [default=15]
-
- --decoder <decoder path>
- Decoder binary to use.
-
- --density-prune <N>
- Limit the density of the hypergraph on each iteration to N times
- the number of edges on the Viterbi path.
-
--help
Print this message and exit.
- --iteration <I>
- Starting iteration number. If not specified, defaults to 1.
-
--max-iterations <M>
Maximum number of iterations to run. If not specified, defaults
to 10.
@@ -651,9 +624,6 @@ Options:
If the decoder is doing multi-pass decoding, the pass suffix "2",
"3", etc., is used to control what iteration of weights is set.
- --pmem <N>
- Amount of physical memory requested for parallel decoding jobs.
-
--ref-files <files>
Dev set ref files. This option takes only a single string argument.
To use multiple files (including file globbing), this argument should
@@ -678,6 +648,7 @@ Options:
A file specifying initial feature weights. The format is
FeatureName_1 value1
FeatureName_2 value2
+ **All and only the weights listed in <file> will be optimized!**
--workdir <dir>
Directory for intermediate and output files. If not specified, the
@@ -687,6 +658,19 @@ Options:
the filename. E.g. an ini file named decoder.foo.ini would have
a default working directory name foo.
+Job control options:
+
+ --jobs <I>
+ Number of decoder processes to run in parallel. [default=$default_jobs]
+
+ --qsub
+ Use qsub to run jobs in parallel (qsub must be configured in
+ environment/LocalEnvironment.pm)
+
+ --pmem <N>
+ Amount of physical memory requested for parallel decoding jobs
+ (used with qsub requests only)
+
Help
}