diff options
Diffstat (limited to 'vest')
-rwxr-xr-x | vest/dist-vest.pl | 77 |
1 files changed, 37 insertions, 40 deletions
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 3139dc73..9a00f17a 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -33,7 +33,6 @@ my $run_local = 0; my $best_weights; my $max_iterations = 15; my $optimization_iters = 6; -my $num_rand_points = 20; my $decode_nodes = 15; # number of decode nodes my $pmem = "9g"; my $disable_clean = 0; @@ -120,9 +119,7 @@ if ($restart){ unless (-e $iniFile){ die "ERROR: Could not find ini file in $dir to restart\n"; } - $logfile = "$dir/mert.log"; - open(LOGFILE, ">>$logfile"); - print LOGFILE "RESTARTING STOPPED OPTIMIZATION\n\n"; + print STDERR "RESTARTING STOPPED OPTIMIZATION\n\n"; # figure out best weights so far and iteration number open(LOG, "$dir/mert.log"); @@ -187,9 +184,9 @@ if ($dryrun){ } unless($restart){ $logfile = "$dir/mert.log"; - open(LOGFILE, ">$logfile"); + open(STDERR, ">$logfile"); } - write_config(*LOGFILE); + write_config(*STDERR); } @@ -214,7 +211,7 @@ my $lastWeightsFile; my $lastPScore = 0; # main optimization loop while (1){ - print LOGFILE "\n\nITERATION $iteration\n==========\n"; + print STDERR "\n\nITERATION $iteration\n==========\n"; # iteration-specific files my $runFile="$dir/run.raw.$iteration"; @@ -225,62 +222,62 @@ while (1){ `mkdir -p $logdir`; #decode - print LOGFILE "DECODE\n"; - print LOGFILE `date`; + print STDERR "DECODE\n"; + print STDERR `date`; my $im1 = $iteration - 1; my $weightsFile="$dir/weights.$im1"; my $decoder_cmd = "$decoder -c $iniFile -w $weightsFile -O $dir/hgs"; my $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $decode_nodes -- "; if ($run_local) { $pcmd = "cat $srcFile |"; } my $cmd = $pcmd . "$decoder_cmd 2> $decoderLog 1> $runFile"; - print LOGFILE "COMMAND:\n$cmd\n"; + print STDERR "COMMAND:\n$cmd\n"; my $result = 0; $result = system($cmd); unless ($result == 0){ cleanup(); - print LOGFILE "ERROR: Parallel decoder returned non-zero exit code $result\n"; + print STDERR "ERROR: Parallel decoder returned non-zero exit code $result\n"; die; } my $dec_score = `cat $runFile | $SCORER $refs_comma_sep -l $metric`; chomp $dec_score; - print LOGFILE "DECODER SCORE: $dec_score\n"; + print STDERR "DECODER SCORE: $dec_score\n"; # save space `gzip $runFile`; `gzip $decoderLog`; if ($iteration > $max_iterations){ - print LOGFILE "\nREACHED STOPPING CRITERION: Maximum iterations\n"; + print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n"; last; } # run optimizer - print LOGFILE `date`; + print STDERR `date`; my $mergeLog="$logdir/prune-merge.log.$iteration"; my $score = 0; my $icc = 0; my $inweights="$dir/weights.$im1"; for (my $opt_iter=1; $opt_iter<$optimization_iters; $opt_iter++) { - print LOGFILE "\nGENERATE OPTIMIZATION STRATEGY (OPT-ITERATION $opt_iter/$optimization_iters)\n"; - print LOGFILE `date`; + print STDERR "\nGENERATE OPTIMIZATION STRATEGY (OPT-ITERATION $opt_iter/$optimization_iters)\n"; + print STDERR `date`; $icc++; $cmd="$MAPINPUT -w $inweights -r $dir/hgs -s $devSize -d $rand_directions > $dir/agenda.$im1-$opt_iter"; - print LOGFILE "COMMAND:\n$cmd\n"; + print STDERR "COMMAND:\n$cmd\n"; $result = system($cmd); unless ($result == 0){ cleanup(); - print LOGFILE "ERROR: mapinput command returned non-zero exit code $result\n"; + print STDERR "ERROR: mapinput command returned non-zero exit code $result\n"; die; } `mkdir $dir/splag.$im1`; $cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1-$opt_iter $dir/splag.$im1/mapinput."; - print LOGFILE "COMMAND:\n$cmd\n"; + print STDERR "COMMAND:\n$cmd\n"; $result = system($cmd); unless ($result == 0){ cleanup(); - print LOGFILE "ERROR: split command returned non-zero exit code $result\n"; + print STDERR "ERROR: split command returned non-zero exit code $result\n"; die; } opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!"; @@ -303,11 +300,11 @@ while (1){ $o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard"; my $script = "$MAPPER -s $srcFile -l $metric $refs_comma_sep < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput"; if ($run_local) { - print LOGFILE "COMMAND:\n$script\n"; + print STDERR "COMMAND:\n$script\n"; $result = system($script); unless ($result == 0){ cleanup(); - print LOGFILE "ERROR: mapper returned non-zero exit code $result\n"; + print STDERR "ERROR: mapper returned non-zero exit code $result\n"; die; } } else { @@ -315,7 +312,7 @@ while (1){ open F, ">$script_file" or die "Can't write $script_file: $!"; print F "$script\n"; close F; - if ($first_shard) { print LOGFILE "$script\n"; $first_shard=0; } + if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } $nmappers++; my $jobid = `qsub $QSUB_FLAGS -S /bin/bash -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file`; @@ -324,22 +321,22 @@ while (1){ $jobid =~ s/^(\d+)(.*?)$/\1/g; $jobid =~ s/^Your job (\d+) .*$/\1/; push(@cleanupcmds, "`qdel $jobid 2> /dev/null`"); - print LOGFILE " $jobid"; + print STDERR " $jobid"; if ($joblist == "") { $joblist = $jobid; } else {$joblist = $joblist . "\|" . $jobid; } } } if ($run_local) { } else { - print LOGFILE "\nLaunched $nmappers mappers.\n"; + print STDERR "\nLaunched $nmappers mappers.\n"; sleep 10; - print LOGFILE "Waiting for mappers to complete...\n"; + print STDERR "Waiting for mappers to complete...\n"; while ($nmappers > 0) { sleep 5; my @livejobs = grep(/$joblist/, split(/\n/, `qstat`)); $nmappers = scalar @livejobs; } - print LOGFILE "All mappers complete.\n"; + print STDERR "All mappers complete.\n"; } my $tol = 0; my $til = 0; @@ -350,31 +347,31 @@ while (1){ $til += $ilines; die "$mo: output lines ($olines) doesn't match input lines ($ilines)" unless $olines==$ilines; } - print LOGFILE "Results for $tol/$til lines\n"; - print LOGFILE "\nSORTING AND RUNNING VEST REDUCER\n"; - print LOGFILE `date`; + print STDERR "Results for $tol/$til lines\n"; + print STDERR "\nSORTING AND RUNNING VEST REDUCER\n"; + print STDERR `date`; $cmd="sort -t \$'\\t' -k 1 @mapoutputs | $REDUCER -l $metric > $dir/redoutput.$im1"; - print LOGFILE "COMMAND:\n$cmd\n"; + print STDERR "COMMAND:\n$cmd\n"; $result = system($cmd); unless ($result == 0){ cleanup(); - print LOGFILE "ERROR: reducer command returned non-zero exit code $result\n"; + print STDERR "ERROR: reducer command returned non-zero exit code $result\n"; die; } $cmd="sort -nk3 $DIR_FLAG '-t|' $dir/redoutput.$im1 | head -1"; my $best=`$cmd`; chomp $best; - print LOGFILE "$best\n"; + print STDERR "$best\n"; my ($oa, $x, $xscore) = split /\|/, $best; $score = $xscore; - print LOGFILE "PROJECTED SCORE: $score\n"; + print STDERR "PROJECTED SCORE: $score\n"; if (abs($x) < $epsilon) { - print LOGFILE "\nOPTIMIZER: no score improvement: abs($x) < $epsilon\n"; + print STDERR "\nOPTIMIZER: no score improvement: abs($x) < $epsilon\n"; last; } my $psd = $score - $last_score; $last_score = $score; if (abs($psd) < $epsilon) { - print LOGFILE "\nOPTIMIZER: no score improvement: abs($psd) < $epsilon\n"; + print STDERR "\nOPTIMIZER: no score improvement: abs($psd) < $epsilon\n"; last; } my ($origin, $axis) = split /\s+/, $oa; @@ -401,15 +398,15 @@ while (1){ $lastWeightsFile = "$dir/weights.$iteration"; `cp $inweights $lastWeightsFile`; if ($icc < 2) { - print LOGFILE "\nREACHED STOPPING CRITERION: score change too little\n"; + print STDERR "\nREACHED STOPPING CRITERION: score change too little\n"; last; } $lastPScore = $score; $iteration++; - print LOGFILE "\n==========\n"; + print STDERR "\n==========\n"; } -print LOGFILE "\nFINAL WEIGHTS: $dir/$lastWeightsFile\n(Use -w <this file> with hiero)\n\n"; +print STDERR "\nFINAL WEIGHTS: $dir/$lastWeightsFile\n(Use -w <this file> with hiero)\n\n"; sub normalize_weights { my ($rfn, $rpts, $feat) = @_; @@ -425,7 +422,7 @@ sub normalize_weights { for (my $i=0; $i < scalar @feat_names; $i++) { $pts[$i] /= $z; } - print LOGFILE " NORM WEIGHTS: @pts\n"; + print STDERR " NORM WEIGHTS: @pts\n"; return @pts; } |