From 875daff93c149ca5ed63ae0cdcf20be06b72d778 Mon Sep 17 00:00:00 2001 From: redpony Date: Mon, 28 Jun 2010 00:23:35 +0000 Subject: fix mert to avoid fork problem, simplify parallelize git-svn-id: https://ws10smt.googlecode.com/svn/trunk@30 ec762483-ff6d-05da-a07a-a48fb63a330f --- vest/dist-vest.pl | 39 ++++++++++++++++++--------------------- vest/parallelize.pl | 30 +++++++++++------------------- 2 files changed, 29 insertions(+), 40 deletions(-) diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 22d6478a..2b4bf86b 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -34,8 +34,7 @@ my $best_weights; my $max_iterations = 15; my $optimization_iters = 6; my $num_rand_points = 20; -my $mert_nodes = join(" ", grep(/^c\d\d$/, split(/\n/, `pbsnodes -a`))); # "1 1 1 1 1" fails due to file staging conflicts -my $decode_nodes = "1 1 1 1 1 1 1 1 1 1 1 1 1 1 1"; # start 15 jobs +my $decode_nodes = 15; # number of decode nodes my $pmem = "3g"; my $disable_clean = 0; my %seen_weights; @@ -58,7 +57,7 @@ my $decoderOpt; Getopt::Long::Configure("no_auto_abbrev"); if (GetOptions( "decoder=s" => \$decoderOpt, - "decode-nodes=s" => \$decode_nodes, + "decode-nodes=i" => \$decode_nodes, "dont-clean" => \$disable_clean, "dry-run" => \$dryrun, "epsilon" => \$epsilon, @@ -67,7 +66,6 @@ if (GetOptions( "iteration=i" => \$iteration, "local" => \$run_local, "max-iterations=i" => \$max_iterations, - "mert-nodes=s" => \$mert_nodes, "normalize=s" => \$normalize, "pmem=s" => \$pmem, "ranges=s" => \$ranges, @@ -178,6 +176,7 @@ if ($dryrun){ } else { mkdir $dir; mkdir "$dir/hgs"; + mkdir "$dir/scripts"; unless (-e $initialWeights) { print STDERR "Please specify an initial weights file with --initial-weights\n"; print_help(); @@ -231,7 +230,7 @@ while (1){ my $im1 = $iteration - 1; my $weightsFile="$dir/weights.$im1"; my $decoder_cmd = "$decoder -c $iniFile -w $weightsFile -O $dir/hgs"; - my $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -n \"$decode_nodes\" -- "; + my $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $decode_nodes -- "; if ($run_local) { $pcmd = "cat $srcFile |"; } my $cmd = $pcmd . "$decoder_cmd 2> $decoderLog 1> $runFile"; print LOGFILE "COMMAND:\n$cmd\n"; @@ -312,23 +311,22 @@ while (1){ die; } } else { - my $todo = "qsub $QSUB_FLAGS -S /bin/bash -N $client_name -o /dev/null -e $logdir/$client_name.ER"; - local(*QOUT, *QIN); - open2(\*QOUT, \*QIN, $todo); - print QIN $script; + my $script_file = "$dir/scripts/map.$shard"; + open F, ">$script_file" or die "Can't write $script_file: $!"; + print F "$script\n"; + close F; if ($first_shard) { print LOGFILE "$script\n"; $first_shard=0; } - close QIN; + $nmappers++; - while (my $jobid=){ - chomp $jobid; - $jobid =~ s/^(\d+)(.*?)$/\1/g; - $jobid =~ s/^Your job (\d+) .*$/\1/; - push(@cleanupcmds, "`qdel $jobid 2> /dev/null`"); - print LOGFILE " $jobid"; - if ($joblist == "") { $joblist = $jobid; } - else {$joblist = $joblist . "\|" . $jobid; } - } - close QOUT; + my $jobid = `qsub $QSUB_FLAGS -S /bin/bash -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file`; + die "qsub failed: $!" unless $? == 0; + chomp $jobid; + $jobid =~ s/^(\d+)(.*?)$/\1/g; + $jobid =~ s/^Your job (\d+) .*$/\1/; + push(@cleanupcmds, "`qdel $jobid 2> /dev/null`"); + print LOGFILE " $jobid"; + if ($joblist == "") { $joblist = $jobid; } + else {$joblist = $joblist . "\|" . $jobid; } } } if ($run_local) { @@ -484,7 +482,6 @@ sub write_config { print $fh "EVAL METRIC: $metric\n"; print $fh "START ITERATION: $iteration\n"; print $fh "MAX ITERATIONS: $max_iterations\n"; - print $fh "MERT NODES: $mert_nodes\n"; print $fh "DECODE NODES: $decode_nodes\n"; print $fh "HEAD NODE: $host\n"; print $fh "PMEM (DECODING): $pmem\n"; diff --git a/vest/parallelize.pl b/vest/parallelize.pl index f01232c8..8b3f56dd 100755 --- a/vest/parallelize.pl +++ b/vest/parallelize.pl @@ -26,7 +26,7 @@ my $errordir; my $multiline; my @files_to_stage; my $verbose = 1; -my $nodelist; +my $numnodes; my $user = $ENV{"USER"}; my $pmem = "2g"; @@ -40,21 +40,12 @@ if (GetOptions( "multi-line" => \$multiline, "file=s" => \@files_to_stage, "verbose" => \$verbose, - "nodelist=s" => \$nodelist, + "jobs=i" => \$numnodes, "pmem=s" => \$pmem ) == 0 || @ARGV == 0){ print_help(); } -my @nodes = grep(/^[cd]\d\d$/, split(/\n/, `pbsnodes -a`)); -if ($nodelist){ - @nodes = split(/ /, $nodelist); -} - -if ($verbose){ - print STDERR "Compute nodes: @nodes\n"; -} - my $cmd = ""; for my $arg (@ARGV){ if ($arg=~ /\s/){ @@ -64,6 +55,8 @@ for my $arg (@ARGV){ } } +die "Please specify a command to parallelize\n" if $cmd eq ''; + if ($errordir){ `mkdir -p $errordir`; } @@ -132,8 +125,8 @@ if (my $pid = fork){ print STDERR $script; print STDERR "====\n"; } - for my $node (@nodes){ - launch_job_on_node($node); + for (my $jobn=0; $jobn<$numnodes; $jobn++){ + launch_job_on_node(1); } if ($recycle_clients) { my $ret; @@ -143,9 +136,8 @@ if (my $pid = fork){ #print STDERR "waitpid $pid ret = $ret \n"; if ($ret != 0) {last; } # break $livejobs = numof_live_jobs(); - if ( $#nodes >= $livejobs ) { # a client terminated - my $numof_nodes = scalar @nodes; - print STDERR "num of requested nodes = $numof_nodes; num of currently live jobs = $livejobs; Client terminated - launching another.\n"; + if ($numnodes >= $livejobs ) { # a client terminated + print STDERR "num of requested nodes = $numnodes; num of currently live jobs = $livejobs; Client terminated - launching another.\n"; launch_job_on_node(1); # TODO: support named nodes } else { sleep (60); @@ -197,6 +189,7 @@ sub launch_job_on_node { if ($verbose){ print STDERR "Launched client job: $jobid"; } push(@cleanup_cmds, "`qdel $jobid 2> /dev/null`"); $jobid =~ s/^(\d+)(.*?)$/\1/g; + $jobid =~ s/^Your job (\d+) .*$/\1/; print STDERR "short job id $jobid\n"; if ($joblist == "") { $joblist = $jobid; } else {$joblist = $joblist . "\|" . $jobid; } @@ -244,9 +237,8 @@ options: -v, --verbose Print diagnostic informatoin on stderr. - -n, --nodelist - Space-delimited list of nodes to request. There is - one qsub command per node. + -j, --jobs + Number of jobs to use. -p, --pmem pmem setting for each job. -- cgit v1.2.3