From bcda3258ab35cba2f71e28e1c93863958f5aca8b Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 7 Nov 2011 18:09:47 -0500 Subject: updates to pro to support regularization to previous weight vectors, regualarization normalization, disable broken regularization tuning --- pro-train/dist-pro.pl | 22 +++++++++++-- pro-train/mr_pro_reduce.cc | 82 ++++++++++++++++++++++++++-------------------- 2 files changed, 66 insertions(+), 38 deletions(-) (limited to 'pro-train') diff --git a/pro-train/dist-pro.pl b/pro-train/dist-pro.pl index dbfa329a..4bc9cfe3 100755 --- a/pro-train/dist-pro.pl +++ b/pro-train/dist-pro.pl @@ -41,6 +41,7 @@ my $lines_per_mapper = 30; my $iteration = 1; my $run_local = 0; my $best_weights; +my $psi = 1; my $max_iterations = 30; my $decode_nodes = 15; # number of decode nodes my $pmem = "4g"; @@ -62,7 +63,8 @@ my $cpbin=1; # regularization strength my $tune_regularizer = 0; -my $reg = 1e-2; +my $reg = 10; +my $reg_previous = 0; # Process command-line options Getopt::Long::Configure("no_auto_abbrev"); @@ -73,10 +75,12 @@ if (GetOptions( "use-fork" => \$usefork, "dry-run" => \$dryrun, "epsilon=s" => \$epsilon, + "interpolate-with-weights=f" => \$psi, "help" => \$help, "weights=s" => \$initial_weights, "tune-regularizer" => \$tune_regularizer, "reg=f" => \$reg, + "reg-previous=f" => \$reg_previous, "local" => \$run_local, "use-make=i" => \$use_make, "max-iterations=i" => \$max_iterations, @@ -91,6 +95,8 @@ if (GetOptions( exit; } +die "--tune-regularizer is no longer supported with --reg-previous and --reg. Please tune manually.\n" if $tune_regularizer; + if ($usefork) { $usefork = "--use-fork"; } else { $usefork = ''; } if ($metric =~ /^(combi|ter)$/i) { @@ -411,7 +417,7 @@ while (1){ } print STDERR "\nRUNNING CLASSIFIER (REDUCER)\n"; print STDERR unchecked_output("date"); - $cmd="cat @dev_outs | $REDUCER -w $dir/weights.$im1 -s $reg"; + $cmd="cat @dev_outs | $REDUCER -w $dir/weights.$im1 -C $reg -y $reg_previous --interpolate_with_weights $psi"; if ($tune_regularizer) { $cmd .= " -T -t $dev_test_file"; } @@ -605,11 +611,21 @@ General options: Regularization options: + --interpolate-with-weights + [deprecated] At each iteration the resulting weights are + interpolated with the weights from the previous iteration, with + this factor. + --tune-regularizer Hold out one third of the tuning data and used this to tune the - regularization parameter. + regularization parameter. [this doesn't work well] --reg + l2 regularization strength + + --reg-previous + l2 penalty for moving away from the weights from the previous + iteration. Help } diff --git a/pro-train/mr_pro_reduce.cc b/pro-train/mr_pro_reduce.cc index aff410a0..98cddba2 100644 --- a/pro-train/mr_pro_reduce.cc +++ b/pro-train/mr_pro_reduce.cc @@ -23,11 +23,12 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() ("weights,w", po::value(), "Weights from previous iteration (used as initialization and interpolation") - ("interpolation,p",po::value()->default_value(0.9), "Output weights are p*w + (1-p)*w_prev") + ("regularize_to_weights,y",po::value()->default_value(0.0), "Differences in learned weights to previous weights are penalized with an l2 penalty with this strength; 0.0 = no effect") + ("interpolate_with_weights,p",po::value()->default_value(1.0), "Output weights are p*w + (1-p)*w_prev; 1.0 = no effect") ("memory_buffers,m",po::value()->default_value(200), "Number of memory buffers (LBFGS)") - ("sigma_squared,s",po::value()->default_value(0.1), "Sigma squared for Gaussian prior") - ("min_reg,r",po::value()->default_value(1e-8), "When tuning (-T) regularization strength, minimum regularization strenght") - ("max_reg,R",po::value()->default_value(10.0), "When tuning (-T) regularization strength, maximum regularization strenght") + ("regularization_strength,C",po::value()->default_value(1.0), "l2 regularization strength") + ("min_reg,r",po::value()->default_value(0.01), "When tuning (-T) regularization strength, minimum regularization strenght") + ("max_reg,R",po::value()->default_value(1e6), "When tuning (-T) regularization strength, maximum regularization strenght") ("testset,t",po::value(), "Optional held-out test set") ("tune_regularizer,T", "Use the held out test set (-t) to tune the regularization strength") ("help,h", "Help"); @@ -95,6 +96,27 @@ void GradAdd(const SparseVector& v, const double scale, vector& weights, + const vector& prev_weights, + vector* g) { + assert(weights.size() == g->size()); + double reg = 0; + for (size_t i = 0; i < weights.size(); ++i) { + const double prev_w_i = (i < prev_weights.size() ? prev_weights[i] : 0.0); + const double& w_i = weights[i]; + double& g_i = (*g)[i]; + reg += C * w_i * w_i; + g_i += 2 * C * w_i; + + const double diff_i = w_i - prev_w_i; + reg += T * diff_i * diff_i; + g_i += 2 * T * diff_i; + } + return reg; +} + double TrainingInference(const vector& x, const vector > >& corpus, vector* g = NULL) { @@ -134,8 +156,10 @@ double TrainingInference(const vector& x, // return held-out log likelihood double LearnParameters(const vector > >& training, const vector > >& testing, - const double sigsq, + const double C, + const double T, const unsigned memory_buffers, + const vector& prev_x, vector* px) { vector& x = *px; vector vg(FD::NumFeats(), 0.0); @@ -157,26 +181,12 @@ double LearnParameters(const vector > >& train } // handle regularizer -#if 1 - double norm = 0; - for (int i = 1; i < x.size(); ++i) { - const double mean_i = 0.0; - const double param = (x[i] - mean_i); - norm += param * param; - vg[i] += param / sigsq; - } - const double reg = norm / (2.0 * sigsq); -#else - double reg = 0; -#endif + double reg = ApplyRegularizationTerms(C, T, x, prev_x, &vg); cll += reg; - cerr << cll << " (REG=" << reg << ")\tPPL=" << ppl << "\t TEST_PPL=" << tppl << "\t"; + cerr << cll << " (REG=" << reg << ")\tPPL=" << ppl << "\t TEST_PPL=" << tppl << "\t" << endl; try { - vector old_x = x; - do { - opt.Optimize(cll, vg, &x); - converged = opt.HasConverged(); - } while (!converged && x == old_x); + opt.Optimize(cll, vg, &x); + converged = opt.HasConverged(); } catch (...) { cerr << "Exception caught, assuming convergence is close enough...\n"; converged = true; @@ -201,13 +211,14 @@ int main(int argc, char** argv) { } const double min_reg = conf["min_reg"].as(); const double max_reg = conf["max_reg"].as(); - double sigsq = conf["sigma_squared"].as(); // will be overridden if parameter is tuned - assert(sigsq > 0.0); + double C = conf["regularization_strength"].as(); // will be overridden if parameter is tuned + const double T = conf["regularize_to_weights"].as(); + assert(C > 0.0); assert(min_reg > 0.0); assert(max_reg > 0.0); assert(max_reg > min_reg); - const double psi = conf["interpolation"].as(); - if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; } + const double psi = conf["interpolate_with_weights"].as(); + if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; return 1; } ReadCorpus(&cin, &training); if (conf.count("testset")) { ReadFile rf(conf["testset"].as()); @@ -231,14 +242,15 @@ int main(int argc, char** argv) { vector > sp; vector smoothed; if (tune_regularizer) { - sigsq = min_reg; + C = min_reg; const double steps = 18; double sweep_factor = exp((log(max_reg) - log(min_reg)) / steps); cerr << "SWEEP FACTOR: " << sweep_factor << endl; - while(sigsq < max_reg) { - tppl = LearnParameters(training, testing, sigsq, conf["memory_buffers"].as(), &x); - sp.push_back(make_pair(sigsq, tppl)); - sigsq *= sweep_factor; + while(C < max_reg) { + cerr << "C=" << C << "\tT=" <(), prev_x, &x); + sp.push_back(make_pair(C, tppl)); + C *= sweep_factor; } smoothed.resize(sp.size(), 0); smoothed[0] = sp[0].second; @@ -257,16 +269,16 @@ int main(int argc, char** argv) { best_i = i; } } - sigsq = sp[best_i].first; + C = sp[best_i].first; } // tune regularizer - tppl = LearnParameters(training, testing, sigsq, conf["memory_buffers"].as(), &x); + tppl = LearnParameters(training, testing, C, T, conf["memory_buffers"].as(), prev_x, &x); if (conf.count("weights")) { for (int i = 1; i < x.size(); ++i) { x[i] = (x[i] * psi) + prev_x[i] * (1.0 - psi); } } cout.precision(15); - cout << "# sigma^2=" << sigsq << "\theld out perplexity="; + cout << "# C=" << C << "\theld out perplexity="; if (tppl) { cout << tppl << endl; } else { cout << "N/A\n"; } if (sp.size()) { cout << "# Parameter sweep:\n"; -- cgit v1.2.3 From 8b13335bbd877d337320efa0aa549f97cf303ae5 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 10 Nov 2011 20:19:09 +0000 Subject: better defaults for pro --- pro-train/dist-pro.pl | 45 +++++++++++++++++++++------------------------ pro-train/mr_pro_reduce.cc | 8 ++++---- vest/parallelize.pl | 2 +- 3 files changed, 26 insertions(+), 29 deletions(-) (limited to 'pro-train') diff --git a/pro-train/dist-pro.pl b/pro-train/dist-pro.pl index 4bc9cfe3..6332563f 100755 --- a/pro-train/dist-pro.pl +++ b/pro-train/dist-pro.pl @@ -63,8 +63,8 @@ my $cpbin=1; # regularization strength my $tune_regularizer = 0; -my $reg = 10; -my $reg_previous = 0; +my $reg = 500; +my $reg_previous = 5000; # Process command-line options Getopt::Long::Configure("no_auto_abbrev"); @@ -547,16 +547,12 @@ sub enseg { sub print_help { my $executable = check_output("basename $0"); chomp $executable; - print << "Help"; + print << "Help"; Usage: $executable [options] $executable [options] - Runs a complete MERT optimization and test set decoding, using - the decoder configuration in ini file. Note that many of the - options have default values that are inferred automatically - based on certain conventions. For details, refer to descriptions - of the options --decoder, --weights, and --workdir. + Runs a complete PRO optimization using the ini file specified. Required: @@ -576,6 +572,10 @@ General options: --local Run the decoder and optimizer locally with a single thread. + --use-make + Use make -j to run the optimizer commands (useful on large + shared-memory machines where qsub is unavailable). + --decode-nodes Number of decoder processes to run in parallel. [default=15] @@ -584,7 +584,7 @@ General options: --max-iterations Maximum number of iterations to run. If not specified, defaults - to 10. + to 30. --metric Metric to optimize. @@ -597,10 +597,6 @@ General options: --pmem Amount of physical memory requested for parallel decoding jobs. - --use-make - Use make -j to run the optimizer commands (useful on large - shared-memory machines where qsub is unavailable). - --workdir Directory for intermediate and output files. If not specified, the name is derived from the ini filename. Assuming that the ini @@ -611,21 +607,22 @@ General options: Regularization options: - --interpolate-with-weights - [deprecated] At each iteration the resulting weights are - interpolated with the weights from the previous iteration, with - this factor. - - --tune-regularizer - Hold out one third of the tuning data and used this to tune the - regularization parameter. [this doesn't work well] - --reg - l2 regularization strength + l2 regularization strength [default=500]. The greater this value, + the closer to zero the weights will be. --reg-previous l2 penalty for moving away from the weights from the previous - iteration. + iteration. [default=5000]. The greater this value, the closer + to the previous iteration's weights the next iteration's weights + will be. + +Deprecated options: + + --interpolate-with-weights + [deprecated] At each iteration the resulting weights are + interpolated with the weights from the previous iteration, with + this factor. [default=1.0, i.e., no effect] Help } diff --git a/pro-train/mr_pro_reduce.cc b/pro-train/mr_pro_reduce.cc index 98cddba2..6362ce47 100644 --- a/pro-train/mr_pro_reduce.cc +++ b/pro-train/mr_pro_reduce.cc @@ -23,14 +23,14 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() ("weights,w", po::value(), "Weights from previous iteration (used as initialization and interpolation") - ("regularize_to_weights,y",po::value()->default_value(0.0), "Differences in learned weights to previous weights are penalized with an l2 penalty with this strength; 0.0 = no effect") - ("interpolate_with_weights,p",po::value()->default_value(1.0), "Output weights are p*w + (1-p)*w_prev; 1.0 = no effect") - ("memory_buffers,m",po::value()->default_value(200), "Number of memory buffers (LBFGS)") - ("regularization_strength,C",po::value()->default_value(1.0), "l2 regularization strength") + ("regularization_strength,C",po::value()->default_value(500.0), "l2 regularization strength") + ("regularize_to_weights,y",po::value()->default_value(5000.0), "Differences in learned weights to previous weights are penalized with an l2 penalty with this strength; 0.0 = no effect") + ("memory_buffers,m",po::value()->default_value(100), "Number of memory buffers (LBFGS)") ("min_reg,r",po::value()->default_value(0.01), "When tuning (-T) regularization strength, minimum regularization strenght") ("max_reg,R",po::value()->default_value(1e6), "When tuning (-T) regularization strength, maximum regularization strenght") ("testset,t",po::value(), "Optional held-out test set") ("tune_regularizer,T", "Use the held out test set (-t) to tune the regularization strength") + ("interpolate_with_weights,p",po::value()->default_value(1.0), "[deprecated] Output weights are p*w + (1-p)*w_prev; 1.0 = no effect") ("help,h", "Help"); po::options_description dcmdline_options; dcmdline_options.add(opts); diff --git a/vest/parallelize.pl b/vest/parallelize.pl index 869f430b..7d0365cc 100755 --- a/vest/parallelize.pl +++ b/vest/parallelize.pl @@ -396,7 +396,7 @@ usage: $name [options] options: - --fork + --use-fork Instead of using qsub, use fork. -e, --error-dir -- cgit v1.2.3 From b4fd470d2cb80b0c88d4210f7e5bb10d2aa4531d Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 11 Nov 2011 14:13:26 -0500 Subject: new improved distributed operation for PRO, MERT --- environment/LocalConfig.pm | 43 ++++++++++++++++----- pro-train/dist-pro.pl | 77 +++++++++++++++++++------------------ vest/dist-vest.pl | 96 +++++++++++++++++++--------------------------- 3 files changed, 112 insertions(+), 104 deletions(-) (limited to 'pro-train') diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm index 4e5e0d5f..831a3a43 100644 --- a/environment/LocalConfig.pm +++ b/environment/LocalConfig.pm @@ -4,7 +4,7 @@ use strict; use warnings; use base 'Exporter'; -our @EXPORT = qw( qsub_args mert_memory environment_name ); +our @EXPORT = qw( qsub_args mert_memory environment_name env_default_jobs has_qsub ); use Net::Domain qw(hostname hostfqdn hostdomain domainname); @@ -14,47 +14,62 @@ my $host = domainname; my $CCONFIG = { 'StarCluster' => { 'HOST_REGEXP' => qr/compute-\d+\.internal$/, + 'JobControl' => 'qsub', 'QSubMemFlag' => '-l mem', + 'DefaultJobs' => 20, }, 'LTICluster' => { 'HOST_REGEXP' => qr/^cluster\d+\.lti\.cs\.cmu\.edu$/, + 'JobControl' => 'qsub', 'QSubMemFlag' => '-l h_vmem=', 'QSubExtraFlags' => '-l walltime=0:45:00', + 'DefaultJobs' => 15, #'QSubQueue' => '-q long', }, 'UMIACS' => { 'HOST_REGEXP' => qr/^d.*\.umiacs\.umd\.edu$/, + 'JobControl' => 'qsub', 'QSubMemFlag' => '-l pmem=', 'QSubQueue' => '-q batch', 'QSubExtraFlags' => '-l walltime=144:00:00', + 'DefaultJobs' => 15, }, 'CLSP' => { 'HOST_REGEXP' => qr/\.clsp\.jhu\.edu$/, + 'JobControl' => 'qsub', 'QSubMemFlag' => '-l mem_free=', 'MERTMem' => '9G', + 'DefaultJobs' => 15, }, 'Valhalla' => { 'HOST_REGEXP' => qr/^(thor|tyr)\.inf\.ed\.ac\.uk$/, + 'JobControl' => 'fork', + 'DefaultJobs' => 8, }, 'Blacklight' => { 'HOST_REGEXP' => qr/^(tg-login1.blacklight.psc.teragrid.org|blacklight.psc.edu|bl1.psc.teragrid.org|bl0.psc.teragrid.org)$/, - 'QSubMemFlag' => '-l pmem=', + 'JobControl' => 'fork', + 'DefaultJobs' => 32, }, 'Barrow/Chicago' => { 'HOST_REGEXP' => qr/^(barrow|chicago).lti.cs.cmu.edu$/, - 'QSubMemFlag' => '-l pmem=', + 'JobControl' => 'fork', + 'DefaultJobs' => 8, }, 'OxfordDeathSnakes' => { 'HOST_REGEXP' => qr/^(taipan|tiger).cs.ox.ac.uk$/, - 'QSubMemFlag' => '-l pmem=', + 'JobControl' => 'fork', + 'DefaultJobs' => 12, }, - 'LOCAL' => { - 'HOST_REGEXP' => qr/local\./, + 'LOCAL' => { # LOCAL must be last in the list!!! + 'HOST_REGEXP' => qr//, 'QSubMemFlag' => ' ', + 'JobControl' => 'fork', + 'DefaultJobs' => 2, }, }; -our $senvironment_name; +our $senvironment_name = 'LOCAL'; for my $config_key (keys %$CCONFIG) { my $re = $CCONFIG->{$config_key}->{'HOST_REGEXP'}; die "Can't find HOST_REGEXP for $config_key" unless $re; @@ -63,15 +78,23 @@ for my $config_key (keys %$CCONFIG) { } } -die "NO ENVIRONMENT INFO FOR HOST: $host\nPLEASE EDIT LocalConfig.pm\n" unless $senvironment_name; - our %CONFIG = %{$CCONFIG->{$senvironment_name}}; -print STDERR "**Environment: $senvironment_name\n"; +print STDERR "**Environment: $senvironment_name"; +print STDERR " (has qsub)" if has_qsub(); +print STDERR "\n"; + +sub has_qsub { + return ($CONFIG{'JobControl'} eq 'qsub'); +} sub environment_name { return $senvironment_name; } +sub env_default_jobs { + return 1 * $CONFIG{'DefaultJobs'}; +} + sub qsub_args { my $mem = shift @_; die "qsub_args requires a memory amount as a parameter, e.g. 4G" unless $mem; diff --git a/pro-train/dist-pro.pl b/pro-train/dist-pro.pl index 6332563f..5db053de 100755 --- a/pro-train/dist-pro.pl +++ b/pro-train/dist-pro.pl @@ -10,6 +10,7 @@ use Getopt::Long; use IPC::Open2; use POSIX ":sys_wait_h"; my $QSUB_CMD = qsub_args(mert_memory()); +my $default_jobs = env_default_jobs(); my $VEST_DIR="$SCRIPT_DIR/../vest"; require "$VEST_DIR/libcall.pl"; @@ -39,11 +40,11 @@ die "Can't find $libcall" unless -e $libcall; my $decoder = $cdec; my $lines_per_mapper = 30; my $iteration = 1; -my $run_local = 0; my $best_weights; my $psi = 1; -my $max_iterations = 30; -my $decode_nodes = 15; # number of decode nodes +my $default_max_iter = 30; +my $max_iterations = $default_max_iter; +my $jobs = $default_jobs; # number of decode nodes my $pmem = "4g"; my $disable_clean = 0; my %seen_weights; @@ -55,8 +56,8 @@ my $metric = "ibm_bleu"; my $dir; my $iniFile; my $weights; -my $use_make; # use make to parallelize -my $usefork; +my $use_make = 1; # use make to parallelize +my $useqsub = 0; my $initial_weights; my $pass_suffix = ''; my $cpbin=1; @@ -69,10 +70,10 @@ my $reg_previous = 5000; # Process command-line options Getopt::Long::Configure("no_auto_abbrev"); if (GetOptions( - "decode-nodes=i" => \$decode_nodes, + "jobs=i" => \$jobs, "dont-clean" => \$disable_clean, "pass-suffix=s" => \$pass_suffix, - "use-fork" => \$usefork, + "qsub" => \$useqsub, "dry-run" => \$dryrun, "epsilon=s" => \$epsilon, "interpolate-with-weights=f" => \$psi, @@ -81,7 +82,6 @@ if (GetOptions( "tune-regularizer" => \$tune_regularizer, "reg=f" => \$reg, "reg-previous=f" => \$reg_previous, - "local" => \$run_local, "use-make=i" => \$use_make, "max-iterations=i" => \$max_iterations, "pmem=s" => \$pmem, @@ -97,7 +97,16 @@ if (GetOptions( die "--tune-regularizer is no longer supported with --reg-previous and --reg. Please tune manually.\n" if $tune_regularizer; -if ($usefork) { $usefork = "--use-fork"; } else { $usefork = ''; } +if ($useqsub) { + $use_make = 0; + die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); +} + +my @missing_args = (); +if (!defined $srcFile) { push @missing_args, "--source-file"; } +if (!defined $refFiles) { push @missing_args, "--ref-files"; } +if (!defined $initial_weights) { push @missing_args, "--weights"; } +die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args); if ($metric =~ /^(combi|ter)$/i) { $lines_per_mapper = 5; @@ -254,13 +263,10 @@ while (1){ `rm -f $dir/hgs/*.gz`; my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs"; my $pcmd; - if ($run_local) { - $pcmd = "cat $srcFile |"; - } elsif ($use_make) { - # TODO: Throw error when decode_nodes is specified along with use_make - $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $use_make --"; + if ($use_make) { + $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --"; } else { - $pcmd = "cat $srcFile | $parallelize $usefork -p $pmem -e $logdir -j $decode_nodes --"; + $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --"; } my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; print STDERR "COMMAND:\n$cmd\n"; @@ -333,10 +339,7 @@ while (1){ push @mapoutputs, "$dir/splag.$im1/$mapoutput"; $o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard"; my $script = "$MAPPER -s $srcFile -l $metric $refs_comma_sep -w $inweights -K $dir/kbest < $dir/splag.$im1/$shard > $dir/splag.$im1/$mapoutput"; - if ($run_local) { - print STDERR "COMMAND:\n$script\n"; - check_bash_call($script); - } elsif ($use_make) { + if ($use_make) { my $script_file = "$dir/scripts/map.$shard"; open F, ">$script_file" or die "Can't write $script_file: $!"; print F "#!/bin/bash\n"; @@ -382,12 +385,10 @@ while (1){ } else { @dev_outs = @mapoutputs; } - if ($run_local) { - print STDERR "\nCompleted extraction of training exemplars.\n"; - } elsif ($use_make) { + if ($use_make) { print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n"; close $mkfile; - my $mcmd = "make -j $use_make -f $mkfilename"; + my $mcmd = "make -j $jobs -f $mkfilename"; print STDERR "\nExecuting: $mcmd\n"; check_call($mcmd); } else { @@ -498,7 +499,7 @@ sub write_config { print $fh "REFS (DEV): $refFiles\n"; print $fh "EVAL METRIC: $metric\n"; print $fh "MAX ITERATIONS: $max_iterations\n"; - print $fh "DECODE NODES: $decode_nodes\n"; + print $fh "JOBS: $jobs\n"; print $fh "HEAD NODE: $host\n"; print $fh "PMEM (DECODING): $pmem\n"; print $fh "CLEANUP: $cleanup\n"; @@ -569,22 +570,12 @@ Required: General options: - --local - Run the decoder and optimizer locally with a single thread. - - --use-make - Use make -j to run the optimizer commands (useful on large - shared-memory machines where qsub is unavailable). - - --decode-nodes - Number of decoder processes to run in parallel. [default=15] - --help Print this message and exit. --max-iterations Maximum number of iterations to run. If not specified, defaults - to 30. + to $default_max_iter. --metric Metric to optimize. @@ -594,9 +585,6 @@ General options: If the decoder is doing multi-pass decoding, the pass suffix "2", "3", etc., is used to control what iteration of weights is set. - --pmem - Amount of physical memory requested for parallel decoding jobs. - --workdir Directory for intermediate and output files. If not specified, the name is derived from the ini filename. Assuming that the ini @@ -617,6 +605,19 @@ Regularization options: to the previous iteration's weights the next iteration's weights will be. +Job control options: + + --jobs + Number of decoder processes to run in parallel. [default=$default_jobs] + + --qsub + Use qsub to run jobs in parallel (qsub must be configured in + environment/LocalEnvironment.pm) + + --pmem + Amount of physical memory requested for parallel decoding jobs + (used with qsub requests only) + Deprecated options: --interpolate-with-weights diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index b7a862c4..11e791c1 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -16,6 +16,7 @@ require "libcall.pl"; # Default settings my $srcFile; my $refFiles; +my $default_jobs = env_default_jobs(); my $bin_dir = $SCRIPT_DIR; die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir; my $FAST_SCORE="$bin_dir/../mteval/fast_score"; @@ -39,11 +40,10 @@ my $decoder = $cdec; my $lines_per_mapper = 400; my $rand_directions = 15; my $iteration = 1; -my $run_local = 0; my $best_weights; my $max_iterations = 15; my $optimization_iters = 6; -my $decode_nodes = 15; # number of decode nodes +my $jobs = $default_jobs; # number of decode nodes my $pmem = "9g"; my $disable_clean = 0; my %seen_weights; @@ -64,28 +64,25 @@ my $maxsim=0; my $oraclen=0; my $oracleb=20; my $bleu_weight=1; -my $use_make; # use make to parallelize line search +my $use_make = 1; # use make to parallelize line search my $dirargs=''; my $density_prune; -my $usefork; +my $useqsub; my $pass_suffix = ''; my $cpbin=1; # Process command-line options Getopt::Long::Configure("no_auto_abbrev"); if (GetOptions( "decoder=s" => \$decoderOpt, - "decode-nodes=i" => \$decode_nodes, + "jobs=i" => \$jobs, "density-prune=f" => \$density_prune, "dont-clean" => \$disable_clean, "pass-suffix=s" => \$pass_suffix, - "use-fork" => \$usefork, "dry-run" => \$dryrun, "epsilon=s" => \$epsilon, "help" => \$help, "interval" => \$interval, - "iteration=i" => \$iteration, - "local" => \$run_local, - "use-make=i" => \$use_make, + "qsub" => \$useqsub, "max-iterations=i" => \$max_iterations, "normalize=s" => \$normalize, "pmem=s" => \$pmem, @@ -114,7 +111,16 @@ if (defined $density_prune) { die "--density_prune n: n must be greater than 1.0\n" unless $density_prune > 1.0; } -if ($usefork) { $usefork = "--use-fork"; } else { $usefork = ''; } +if ($useqsub) { + $use_make = 0; + die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub(); +} + +my @missing_args = (); +if (!defined $srcFile) { push @missing_args, "--source-file"; } +if (!defined $refFiles) { push @missing_args, "--ref-files"; } +if (!defined $initialWeights) { push @missing_args, "--weights"; } +die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args); if ($metric =~ /^(combi|ter)$/i) { $lines_per_mapper = 40; @@ -276,17 +282,11 @@ while (1){ my $im1 = $iteration - 1; my $weightsFile="$dir/weights.$im1"; my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs"; - if ($density_prune) { - $decoder_cmd .= " --density_prune $density_prune"; - } my $pcmd; - if ($run_local) { - $pcmd = "cat $srcFile |"; - } elsif ($use_make) { - # TODO: Throw error when decode_nodes is specified along with use_make - $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $use_make --"; + if ($use_make) { + $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --"; } else { - $pcmd = "cat $srcFile | $parallelize $usefork -p $pmem -e $logdir -j $decode_nodes --"; + $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --"; } my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; print STDERR "COMMAND:\n$cmd\n"; @@ -365,10 +365,7 @@ while (1){ push @mapoutputs, "$dir/splag.$im1/$mapoutput"; $o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard"; my $script = "$MAPPER -s $srcFile -l $metric $refs_comma_sep < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput"; - if ($run_local) { - print STDERR "COMMAND:\n$script\n"; - check_bash_call($script); - } elsif ($use_make) { + if ($use_make) { my $script_file = "$dir/scripts/map.$shard"; open F, ">$script_file" or die "Can't write $script_file: $!"; print F "#!/bin/bash\n"; @@ -398,12 +395,10 @@ while (1){ else {$joblist = $joblist . "\|" . $jobid; } } } - if ($run_local) { - print STDERR "\nProcessing line search complete.\n"; - } elsif ($use_make) { + if ($use_make) { print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n"; close $mkfile; - my $mcmd = "make -j $use_make -f $mkfilename"; + my $mcmd = "make -j $jobs -f $mkfilename"; print STDERR "\nExecuting: $mcmd\n"; check_call($mcmd); } else { @@ -558,7 +553,7 @@ sub write_config { print $fh "EVAL METRIC: $metric\n"; print $fh "START ITERATION: $iteration\n"; print $fh "MAX ITERATIONS: $max_iterations\n"; - print $fh "DECODE NODES: $decode_nodes\n"; + print $fh "PARALLEL JOBS: $jobs\n"; print $fh "HEAD NODE: $host\n"; print $fh "PMEM (DECODING): $pmem\n"; print $fh "CLEANUP: $cleanup\n"; @@ -612,37 +607,15 @@ sub print_help { Usage: $executable [options] $executable [options] - Runs a complete MERT optimization and test set decoding, using - the decoder configuration in ini file. Note that many of the - options have default values that are inferred automatically - based on certain conventions. For details, refer to descriptions - of the options --decoder, --weights, and --workdir. + Runs a complete MERT optimization using the decoder configuration + in . Required options are --weights, --source-file, and + --ref-files. Options: - --local - Run the decoder and optimizer locally with a single thread. - - --use-make - Use make -j to run the optimizer commands (useful on large - shared-memory machines where qsub is unavailable). - - --decode-nodes - Number of decoder processes to run in parallel. [default=15] - - --decoder - Decoder binary to use. - - --density-prune - Limit the density of the hypergraph on each iteration to N times - the number of edges on the Viterbi path. - --help Print this message and exit. - --iteration - Starting iteration number. If not specified, defaults to 1. - --max-iterations Maximum number of iterations to run. If not specified, defaults to 10. @@ -651,9 +624,6 @@ Options: If the decoder is doing multi-pass decoding, the pass suffix "2", "3", etc., is used to control what iteration of weights is set. - --pmem - Amount of physical memory requested for parallel decoding jobs. - --ref-files Dev set ref files. This option takes only a single string argument. To use multiple files (including file globbing), this argument should @@ -678,6 +648,7 @@ Options: A file specifying initial feature weights. The format is FeatureName_1 value1 FeatureName_2 value2 + **All and only the weights listed in will be optimized!** --workdir Directory for intermediate and output files. If not specified, the @@ -687,6 +658,19 @@ Options: the filename. E.g. an ini file named decoder.foo.ini would have a default working directory name foo. +Job control options: + + --jobs + Number of decoder processes to run in parallel. [default=$default_jobs] + + --qsub + Use qsub to run jobs in parallel (qsub must be configured in + environment/LocalEnvironment.pm) + + --pmem + Amount of physical memory requested for parallel decoding jobs + (used with qsub requests only) + Help } -- cgit v1.2.3