summaryrefslogtreecommitdiff
path: root/dpmert
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2012-11-14 20:33:51 -0500
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2012-11-14 20:33:51 -0500
commit7928695272b000de7142b91e05959a8fab6b1d2a (patch)
tree59fdff666e938512a34f772f04a1a247704a246f /dpmert
parent41ec6ee5146c92cdb1c279267a5058fe42f8a644 (diff)
major mert clean up, stuff for simple system demo
Diffstat (limited to 'dpmert')
-rwxr-xr-xdpmert/decode-and-evaluate.pl246
-rwxr-xr-xdpmert/dpmert.pl237
-rwxr-xr-xdpmert/parallelize.pl6
3 files changed, 316 insertions, 173 deletions
diff --git a/dpmert/decode-and-evaluate.pl b/dpmert/decode-and-evaluate.pl
new file mode 100755
index 00000000..fe765d00
--- /dev/null
+++ b/dpmert/decode-and-evaluate.pl
@@ -0,0 +1,246 @@
+#!/usr/bin/env perl
+use strict;
+my @ORIG_ARGV=@ARGV;
+use Cwd qw(getcwd);
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
+
+# Skip local config (used for distributing jobs) if we're running in local-only mode
+use LocalConfig;
+use Getopt::Long;
+use File::Basename qw(basename);
+my $QSUB_CMD = qsub_args(mert_memory());
+
+require "libcall.pl";
+
+# Default settings
+my $default_jobs = env_default_jobs();
+my $bin_dir = $SCRIPT_DIR;
+die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
+my $FAST_SCORE="$bin_dir/../mteval/fast_score";
+die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
+my $parallelize = "$bin_dir/parallelize.pl";
+my $libcall = "$bin_dir/libcall.pl";
+my $sentserver = "$bin_dir/sentserver";
+my $sentclient = "$bin_dir/sentclient";
+my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm";
+
+my $SCORER = $FAST_SCORE;
+my $cdec = "$bin_dir/../decoder/cdec";
+die "Can't find decoder in $cdec" unless -x $cdec;
+die "Can't find $parallelize" unless -x $parallelize;
+die "Can't find $libcall" unless -e $libcall;
+my $decoder = $cdec;
+my $jobs = $default_jobs; # number of decode nodes
+my $pmem = "9g";
+my $help = 0;
+my $config;
+my $test_set;
+my $weights;
+my $use_make = 1;
+my $useqsub;
+my $cpbin=1;
+# Process command-line options
+if (GetOptions(
+ "jobs=i" => \$jobs,
+ "help" => \$help,
+ "qsub" => \$useqsub,
+ "input=s" => \$test_set,
+ "config=s" => \$config,
+ "weights=s" => \$weights,
+) == 0 || @ARGV!=0 || $help) {
+ print_help();
+ exit;
+}
+
+if ($useqsub) {
+ $use_make = 0;
+ die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub();
+}
+
+my @missing_args = ();
+
+if (!defined $test_set) { push @missing_args, "--input"; }
+if (!defined $config) { push @missing_args, "--config"; }
+if (!defined $weights) { push @missing_args, "--weights"; }
+die "Please specify missing arguments: " . join (', ', @missing_args) . "\nUse --help for more information.\n" if (@missing_args);
+
+my @tf = localtime(time);
+my $tname = basename($test_set);
+$tname =~ s/\.(sgm|sgml|xml)$//i;
+my $dir = "eval.$tname." . sprintf('%d%02d%02d-%02d%02d%02d', 1900+$tf[5], $tf[4], $tf[3], $tf[2], $tf[1], $tf[0]);
+
+my $time = unchecked_output("date");
+
+check_call("mkdir -p $dir");
+
+split_devset($test_set, "$dir/test.input.raw", "$dir/test.refs");
+my $refs = "-r $dir/test.refs";
+my $newsrc = "$dir/test.input";
+enseg("$dir/test.input.raw", $newsrc);
+my $src_file = $newsrc;
+open F, "<$src_file" or die "Can't read $src_file: $!"; close F;
+
+my $test_trans="$dir/test.trans";
+my $logdir="$dir/logs";
+my $decoderLog="$logdir/decoder.sentserver.log";
+check_call("mkdir -p $logdir");
+
+#decode
+print STDERR "RUNNING DECODER AT ";
+print STDERR unchecked_output("date");
+my $decoder_cmd = "$decoder -c $config --weights $weights";
+my $pcmd;
+if ($use_make) {
+ $pcmd = "cat $src_file | $parallelize --workdir $dir --use-fork -p $pmem -e $logdir -j $jobs --";
+} else {
+ $pcmd = "cat $src_file | $parallelize --workdir $dir -p $pmem -e $logdir -j $jobs --";
+}
+my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $test_trans";
+check_bash_call($cmd);
+print STDERR "DECODER COMPLETED AT ";
+print STDERR unchecked_output("date");
+print STDERR "\nOUTPUT: $test_trans\n\n";
+my $bleu = check_output("cat $test_trans | $SCORER $refs -m ibm_bleu");
+chomp $bleu;
+print STDERR "BLEU: $bleu\n";
+my $ter = check_output("cat $test_trans | $SCORER $refs -m ter");
+chomp $ter;
+print STDERR " TER: $ter\n";
+open TR, ">$dir/test.scores" or die "Can't write $dir/test.scores: $!";
+print TR <<EOT;
+### SCORE REPORT #############################################################
+ OUTPUT=$test_trans
+ SCRIPT INPUT=$test_set
+ DECODER INPUT=$src_file
+ REFERENCES=$dir/test.refs
+------------------------------------------------------------------------------
+ BLEU=$bleu
+ TER=$ter
+##############################################################################
+EOT
+close TR;
+my $sr = unchecked_output("cat $dir/test.scores");
+print STDERR "\n\n$sr\n(A copy of this report can be found in $dir/test.scores)\n\n";
+exit 0;
+
+sub enseg {
+ my $src = shift;
+ my $newsrc = shift;
+ open(SRC, $src);
+ open(NEWSRC, ">$newsrc");
+ my $i=0;
+ while (my $line=<SRC>){
+ chomp $line;
+ if ($line =~ /^\s*<seg/i) {
+ if($line =~ /id="[0-9]+"/) {
+ print NEWSRC "$line\n";
+ } else {
+ die "When using segments with pre-generated <seg> tags, you must include a zero-based id attribute";
+ }
+ } else {
+ print NEWSRC "<seg id=\"$i\">$line</seg>\n";
+ }
+ $i++;
+ }
+ close SRC;
+ close NEWSRC;
+}
+
+sub print_help {
+ my $executable = basename($0); chomp $executable;
+ print << "Help";
+
+Usage: $executable [options] <ini file>
+
+ $executable --config cdec.ini --weights weights.txt [--jobs N] [--qsub] <testset.in-ref>
+
+Options:
+
+ --help
+ Print this message and exit.
+
+ --config <file>
+ A path to the cdec.ini file.
+
+ --weights <file>
+ A file specifying feature weights.
+
+ --dir <dir>
+ Directory for intermediate and output files.
+
+Job control options:
+
+ --jobs <I>
+ Number of decoder processes to run in parallel. [default=$default_jobs]
+
+ --qsub
+ Use qsub to run jobs in parallel (qsub must be configured in
+ environment/LocalEnvironment.pm)
+
+ --pmem <N>
+ Amount of physical memory requested for parallel decoding jobs
+ (used with qsub requests only)
+
+Help
+}
+
+sub convert {
+ my ($str) = @_;
+ my @ps = split /;/, $str;
+ my %dict = ();
+ for my $p (@ps) {
+ my ($k, $v) = split /=/, $p;
+ $dict{$k} = $v;
+ }
+ return %dict;
+}
+
+
+
+sub cmdline {
+ return join ' ',($0,@ORIG_ARGV);
+}
+
+#buggy: last arg gets quoted sometimes?
+my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]};
+my $shell_escape_in_quote=qr{[\\"\$`!]};
+
+sub escape_shell {
+ my ($arg)=@_;
+ return undef unless defined $arg;
+ if ($arg =~ /$is_shell_special/) {
+ $arg =~ s/($shell_escape_in_quote)/\\$1/g;
+ return "\"$arg\"";
+ }
+ return $arg;
+}
+
+sub escaped_shell_args {
+ return map {local $_=$_;chomp;escape_shell($_)} @_;
+}
+
+sub escaped_shell_args_str {
+ return join ' ',&escaped_shell_args(@_);
+}
+
+sub escaped_cmdline {
+ return "$0 ".&escaped_shell_args_str(@ORIG_ARGV);
+}
+
+sub split_devset {
+ my ($infile, $outsrc, $outref) = @_;
+ open F, "<$infile" or die "Can't read $infile: $!";
+ open S, ">$outsrc" or die "Can't write $outsrc: $!";
+ open R, ">$outref" or die "Can't write $outref: $!";
+ while(<F>) {
+ chomp;
+ my ($src, @refs) = split /\s*\|\|\|\s*/;
+ die "Malformed devset line: $_\n" unless scalar @refs > 0;
+ print S "$src\n";
+ print R join(' ||| ', @refs) . "\n";
+ }
+ close R;
+ close S;
+ close F;
+}
+
diff --git a/dpmert/dpmert.pl b/dpmert/dpmert.pl
index 2e6a9728..c4f98870 100755
--- a/dpmert/dpmert.pl
+++ b/dpmert/dpmert.pl
@@ -7,15 +7,14 @@ my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR
# Skip local config (used for distributing jobs) if we're running in local-only mode
use LocalConfig;
use Getopt::Long;
-use IPC::Open2;
-use POSIX ":sys_wait_h";
-my $QSUB_CMD = qsub_args(mert_memory());
-
+use File::Basename qw(basename);
require "libcall.pl";
+my $QSUB_CMD = qsub_args(mert_memory());
+
# Default settings
-my $srcFile;
-my $refFiles;
+my $srcFile; # deprecated
+my $refFiles; # deprecated
my $default_jobs = env_default_jobs();
my $bin_dir = $SCRIPT_DIR;
die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
@@ -37,7 +36,7 @@ die "Can't find decoder in $cdec" unless -x $cdec;
die "Can't find $parallelize" unless -x $parallelize;
die "Can't find $libcall" unless -e $libcall;
my $decoder = $cdec;
-my $lines_per_mapper = 400;
+my $lines_per_mapper = 200;
my $rand_directions = 15;
my $iteration = 1;
my $best_weights;
@@ -47,53 +46,35 @@ my $jobs = $default_jobs; # number of decode nodes
my $pmem = "9g";
my $disable_clean = 0;
my %seen_weights;
-my $normalize;
my $help = 0;
my $epsilon = 0.0001;
-my $interval = 5;
-my $dryrun = 0;
my $last_score = -10000000;
my $metric = "ibm_bleu";
my $dir;
my $iniFile;
my $weights;
my $initialWeights;
-my $decoderOpt;
-my $noprimary;
-my $maxsim=0;
-my $oraclen=0;
-my $oracleb=20;
my $bleu_weight=1;
my $use_make = 1; # use make to parallelize line search
my $useqsub;
my $pass_suffix = '';
-my $devset = '';
-my $cpbin=1;
+my $devset;
# Process command-line options
-Getopt::Long::Configure("no_auto_abbrev");
if (GetOptions(
- "decoder=s" => \$decoderOpt,
+ "config=s" => \$iniFile,
+ "weights=s" => \$initialWeights,
+ "devset=s" => \$devset,
"jobs=i" => \$jobs,
- "dont-clean" => \$disable_clean,
"pass-suffix=s" => \$pass_suffix,
- "dry-run" => \$dryrun,
- "epsilon=s" => \$epsilon,
"help" => \$help,
- "interval" => \$interval,
"qsub" => \$useqsub,
- "max-iterations=i" => \$max_iterations,
- "normalize=s" => \$normalize,
+ "iterations=i" => \$max_iterations,
"pmem=s" => \$pmem,
- "cpbin!" => \$cpbin,
"random-directions=i" => \$rand_directions,
- "devset=s" => \$devset,
- "ref-files=s" => \$refFiles,
"metric=s" => \$metric,
"source-file=s" => \$srcFile,
- "weights=s" => \$initialWeights,
- "workdir=s" => \$dir,
- "opt-iterations=i" => \$optimization_iters,
-) == 0 || @ARGV!=1 || $help) {
+ "output-dir=s" => \$dir,
+) == 0 || @ARGV!=0 || $help) {
print_help();
exit;
}
@@ -114,22 +95,17 @@ if (defined $srcFile || defined $refFiles) {
EOT
}
+if (!defined $iniFile) { push @missing_args, "--config"; }
if (!defined $devset) { push @missing_args, "--devset"; }
if (!defined $initialWeights) { push @missing_args, "--weights"; }
-die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args);
+die "Please specify missing arguments: " . join (', ', @missing_args) . "\nUse --help for more information.\n" if (@missing_args);
if ($metric =~ /^(combi|ter)$/i) {
$lines_per_mapper = 40;
} elsif ($metric =~ /^meteor$/i) {
- $lines_per_mapper = 2000; # start up time is really high
+ $lines_per_mapper = 2000; # start up time is really high for METEOR
}
-($iniFile) = @ARGV;
-
-
-sub write_config;
-sub enseg;
-sub print_help;
my $nodelist;
my $host =check_output("hostname"); chomp $host;
@@ -153,8 +129,6 @@ unless ($dir =~ /^\//){ # convert relative path to absolute path
$dir = "$basedir/$dir";
}
-if ($decoderOpt){ $decoder = $decoderOpt; }
-
# Initializations and helper functions
srand;
@@ -169,73 +143,47 @@ sub cleanup {
exit 1;
};
# Always call cleanup, no matter how we exit
-*CORE::GLOBAL::exit =
- sub{ cleanup(); };
+*CORE::GLOBAL::exit = sub{ cleanup(); };
$SIG{INT} = "cleanup";
$SIG{TERM} = "cleanup";
$SIG{HUP} = "cleanup";
-my $decoderBase = check_output("basename $decoder"); chomp $decoderBase;
+my $decoderBase = basename($decoder); chomp $decoderBase;
my $newIniFile = "$dir/$decoderBase.ini";
my $inputFileName = "$dir/input";
my $user = $ENV{"USER"};
-
# process ini file
-e $iniFile || die "Error: could not open $iniFile for reading\n";
-open(INI, $iniFile);
-use File::Basename qw(basename);
-#pass bindir, refs to vars holding bin
-sub modbin {
- local $_;
- my $bindir=shift;
- check_call("mkdir -p $bindir");
- -d $bindir || die "couldn't make bindir $bindir";
- for (@_) {
- my $src=$$_;
- $$_="$bindir/".basename($src);
- check_call("cp -p $src $$_");
- }
-}
sub dirsize {
opendir ISEMPTY,$_[0];
return scalar(readdir(ISEMPTY))-1;
}
-if ($dryrun){
- write_config(*STDERR);
- exit 0;
+if (-e $dir) {
+ # allow preexisting logfile, binaries, but not dist-dpmert.pl outputs
+ die "ERROR: output directory $dir already exists (remove or use --output-dir dir)\n\n";
} else {
- if (-e $dir && dirsize($dir)>1 && -e "$dir/hgs" ){ # allow preexisting logfile, binaries, but not dist-dpmert.pl outputs
- die "ERROR: working dir $dir already exists\n\n";
- } else {
- -e $dir || mkdir $dir;
- mkdir "$dir/hgs";
- modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$REDUCER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin;
- mkdir "$dir/scripts";
- my $cmdfile="$dir/rerun-dpmert.sh";
- open CMD,'>',$cmdfile;
- print CMD "cd ",&getcwd,"\n";
-# print CMD &escaped_cmdline,"\n"; #buggy - last arg is quoted.
- my $cline=&cmdline."\n";
- print CMD $cline;
- close CMD;
- print STDERR $cline;
- chmod(0755,$cmdfile);
- unless (-e $initialWeights) {
- print STDERR "Please specify an initial weights file with --initial-weights\n";
- print_help();
- exit;
- }
- check_call("cp $initialWeights $dir/weights.0");
- die "Can't find weights.0" unless (-e "$dir/weights.0");
- }
- write_config(*STDERR);
+ mkdir "$dir" or die "Can't mkdir $dir: $!";
+ mkdir "$dir/hgs" or die;
+ mkdir "$dir/scripts" or die;
+ print STDERR <<EOT;
+ DECODER: $decoder
+ INI FILE: $iniFile
+ WORKING DIR: $dir
+ DEVSET: $devset
+ EVAL METRIC: $metric
+ MAX ITERATIONS: $max_iterations
+ PARALLEL JOBS: $jobs
+ HEAD NODE: $host
+ PMEM (DECODING): $pmem
+ INITIAL WEIGHTS: $initialWeights
+EOT
}
-
# Generate initial files and values
check_call("cp $iniFile $newIniFile");
+check_call("cp $initialWeights $dir/weights.0");
$iniFile = $newIniFile;
split_devset($devset, "$dir/dev.input.raw", "$dir/dev.refs");
@@ -280,9 +228,9 @@ while (1){
my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs";
my $pcmd;
if ($use_make) {
- $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --";
+ $pcmd = "cat $srcFile | $parallelize --workdir $dir --use-fork -p $pmem -e $logdir -j $jobs --";
} else {
- $pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --";
+ $pcmd = "cat $srcFile | $parallelize --workdir $dir -p $pmem -e $logdir -j $jobs --";
}
my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile";
print STDERR "COMMAND:\n$cmd\n";
@@ -469,29 +417,11 @@ while (1){
print STDERR "\n==========\n";
}
-print STDERR "\nFINAL WEIGHTS: $lastWeightsFile\n(Use -w <this file> with the decoder)\n\n";
-
-print STDOUT "$lastWeightsFile\n";
-
+check_call("cp $lastWeightsFile $dir/weights.final");
+print STDERR "\nFINAL WEIGHTS: $dir/weights.final\n(Use -w <this file> with the decoder)\n\n";
+print STDOUT "$dir/weights.final\n";
exit 0;
-sub normalize_weights {
- my ($rfn, $rpts, $feat) = @_;
- my @feat_names = @$rfn;
- my @pts = @$rpts;
- my $z = 1.0;
- for (my $i=0; $i < scalar @feat_names; $i++) {
- if ($feat_names[$i] eq $feat) {
- $z = $pts[$i];
- last;
- }
- }
- for (my $i=0; $i < scalar @feat_names; $i++) {
- $pts[$i] /= $z;
- }
- print STDERR " NORM WEIGHTS: @pts\n";
- return @pts;
-}
sub get_lines {
my $fn = shift @_;
@@ -523,27 +453,6 @@ sub read_weights_file {
return join ' ', @r;
}
-# subs
-sub write_config {
- my $fh = shift;
- my $cleanup = "yes";
- if ($disable_clean) {$cleanup = "no";}
-
- print $fh "\n";
- print $fh "DECODER: $decoder\n";
- print $fh "INI FILE: $iniFile\n";
- print $fh "WORKING DIR: $dir\n";
- print $fh "DEVSET: $devset\n";
- print $fh "EVAL METRIC: $metric\n";
- print $fh "START ITERATION: $iteration\n";
- print $fh "MAX ITERATIONS: $max_iterations\n";
- print $fh "PARALLEL JOBS: $jobs\n";
- print $fh "HEAD NODE: $host\n";
- print $fh "PMEM (DECODING): $pmem\n";
- print $fh "CLEANUP: $cleanup\n";
- print $fh "INITIAL WEIGHTS: $initialWeights\n";
-}
-
sub update_weights_file {
my ($neww, $rfn, $rpts) = @_;
my @feats = @$rfn;
@@ -585,22 +494,34 @@ sub enseg {
sub print_help {
- my $executable = check_output("basename $0"); chomp $executable;
- print << "Help";
+ my $executable = basename($0); chomp $executable;
+ print << "Help";
Usage: $executable [options] <ini file>
- $executable [options] <ini file>
- Runs a complete MERT optimization using the decoder configuration
- in <ini file>. Required options are --weights, --source-file, and
- --ref-files.
+ $executable [options]
+ Runs a complete MERT optimization. Required options are --weights,
+ --devset, and --config.
Options:
- --help
- Print this message and exit.
+ --config <file> [-c <file>]
+ The decoder configuration file.
+
+ --devset <file> [-d <file>]
+ The source *and* references for the development set.
+
+ --weights <file> [-w <file>]
+ A file specifying initial feature weights. The format is
+ FeatureName_1 value1
+ FeatureName_2 value2
+ **All and only the weights listed in <file> will be optimized!**
+
+ --metric <name>
+ Metric to optimize.
+ Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi
- --max-iterations <M>
+ --iterations <M>
Maximum number of iterations to run. If not specified, defaults
to 10.
@@ -608,39 +529,15 @@ Options:
If the decoder is doing multi-pass decoding, the pass suffix "2",
"3", etc., is used to control what iteration of weights is set.
- --ref-files <files>
- Dev set ref files. This option takes only a single string argument.
- To use multiple files (including file globbing), this argument should
- be quoted.
-
- --metric <method>
- Metric to optimize.
- Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi
-
- --normalize <feature-name>
- After each iteration, rescale all feature weights such that feature-
- name has a weight of 1.0.
-
--rand-directions <num>
MERT will attempt to optimize along all of the principle directions,
set this parameter to explore other directions. Defaults to 5.
- --source-file <file>
- Dev set source file.
+ --output-dir <dir>
+ Directory for intermediate and output files.
- --weights <file>
- A file specifying initial feature weights. The format is
- FeatureName_1 value1
- FeatureName_2 value2
- **All and only the weights listed in <file> will be optimized!**
-
- --workdir <dir>
- Directory for intermediate and output files. If not specified, the
- name is derived from the ini filename. Assuming that the ini
- filename begins with the decoder name and ends with ini, the default
- name of the working directory is inferred from the middle part of
- the filename. E.g. an ini file named decoder.foo.ini would have
- a default working directory name foo.
+ --help
+ Print this message and exit.
Job control options:
diff --git a/dpmert/parallelize.pl b/dpmert/parallelize.pl
index 7d0365cc..d2ebaeea 100755
--- a/dpmert/parallelize.pl
+++ b/dpmert/parallelize.pl
@@ -40,7 +40,7 @@ my $stay_alive; # dont let server die when having zero clients
my $joblist = "";
my $errordir="";
my $multiline;
-my @files_to_stage;
+my $workdir = '.';
my $numnodes = 8;
my $user = $ENV{"USER"};
my $pmem = "9g";
@@ -128,7 +128,7 @@ unless (GetOptions(
"recycle-clients" => \$recycle_clients,
"error-dir=s" => \$errordir,
"multi-line" => \$multiline,
- "file=s" => \@files_to_stage,
+ "workdir=s" => \$workdir,
"use-fork" => \$use_fork,
"verbose" => \$verbose,
"jobs=i" => \$numnodes,
@@ -363,7 +363,7 @@ sub launch_job_fork {
}
sub get_temp_script {
- my ($fh, $filename) = tempfile( "workXXXX", SUFFIX => '.sh');
+ my ($fh, $filename) = tempfile( "$workdir/workXXXX", SUFFIX => '.sh');
return ($fh, $filename);
}