diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | SConstruct | 31 | ||||
-rw-r--r-- | decoder/ff_wordset.h | 1 | ||||
-rwxr-xr-x | vest/dist-vest.pl | 12 | ||||
-rwxr-xr-x | vest/parallelize.pl | 9 |
5 files changed, 43 insertions, 11 deletions
@@ -120,3 +120,4 @@ gi/posterior-regularisation/prjava/lib/prjava-20100715.jar *.dvi *.ps *.toc +*~
\ No newline at end of file @@ -11,6 +11,9 @@ AddOption('--with-glc', dest='glc', type='string', nargs=1, action='store', meta AddOption('--efence', dest='efence', action='store_true', help='use electric fence for debugging memory corruptions') +# TODO: Troll http://www.scons.org/wiki/SconsAutoconf +# for some initial autoconf-like steps + platform = ARGUMENTS.get('OS', Platform()) include = Split('decoder utils klm mteval .') env = Environment(PREFIX=GetOption('prefix'), @@ -45,7 +48,7 @@ if glc: srcs.append(glc+'/feature-factory.cc') srcs.append(glc+'/cdec/ff_glc.cc') -for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mteval/*.cc']: +for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mteval/*.cc', 'vest/*.cc']: srcs.extend([ file for file in Glob(pattern) if not 'test' in str(file) and 'build_binary.cc' not in str(file) @@ -53,6 +56,30 @@ for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mtev and 'mbr_kbest.cc' not in str(file) and 'sri.cc' not in str(file) and 'fast_score.cc' not in str(file) + and 'cdec.cc' not in str(file) + and 'mr_' not in str(file) ]) -env.Program(target='decoder/cdec', source=srcs) +print 'Found {0} source files'.format(len(srcs)) +def comb(cc, srcs): + x = [cc] + x.extend(srcs) + return x + +env.Program(target='decoder/cdec', source=comb('decoder/cdec.cc', srcs)) +# TODO: The various decoder tests +# TODO: extools +env.Program(target='klm/lm/build_binary', source=comb('klm/lm/build_binary.cc', srcs)) +# TODO: klm ngram_query and tests +env.Program(target='mteval/fast_score', source=comb('mteval/fast_score.cc', srcs)) +env.Program(target='mteval/mbr_kbest', source=comb('mteval/mbr_kbest.cc', srcs)) +#env.Program(target='mteval/scorer_test', source=comb('mteval/fast_score.cc', srcs)) +# TODO: phrasinator +# TODO: Various training binaries +env.Program(target='vest/sentserver', source=['vest/sentserver.c'], LINKFLAGS='-all-static') +env.Program(target='vest/sentclient', source=['vest/sentclient.c'], LINKFLAGS='-all-static') +env.Program(target='vest/mr_vest_generate_mapper_input', source=comb('vest/mr_vest_generate_mapper_input.cc', srcs)) +env.Program(target='vest/mr_vest_map', source=comb('vest/mr_vest_map.cc', srcs)) +env.Program(target='vest/mr_vest_reduce', source=comb('vest/mr_vest_reduce.cc', srcs)) +#env.Program(target='vest/lo_test', source=comb('vest/lo_test.cc', srcs)) +# TODO: util tests diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h index 00e1145b..643097ef 100644 --- a/decoder/ff_wordset.h +++ b/decoder/ff_wordset.h @@ -32,6 +32,7 @@ class WordSet : public FeatureFunction { ~WordSet() { } + Features features() const { return single_feature(fid_); } protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 973a29ef..d17d7de1 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -194,7 +194,6 @@ sub modbin { my $src=$$_; $$_="$bindir/".basename($src); check_call("cp -p $src $$_"); - die "cp $src $$_ failed: $!" unless $? == 0; } } sub dirsize { @@ -372,13 +371,12 @@ while (1){ if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } $nmappers++; - my $qcmd = "QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; + my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; my $jobid = check_output("$qcmd"); - die "qsub failed: $!\nCMD was: $qcmd" unless $? == 0; chomp $jobid; $jobid =~ s/^(\d+)(.*?)$/\1/g; $jobid =~ s/^Your job (\d+) .*$/\1/; - push(@cleanupcmds, check_output("qdel $jobid 2> /dev/null")); + push(@cleanupcmds, "qdel $jobid 2> /dev/null"); print STDERR " $jobid"; if ($joblist == "") { $joblist = $jobid; } else {$joblist = $joblist . "\|" . $jobid; } @@ -398,7 +396,7 @@ while (1){ print STDERR "Waiting for mappers to complete...\n"; while ($nmappers > 0) { sleep 5; - my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat | grep -v ' C '"))); + my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '"))); $nmappers = scalar @livejobs; } print STDERR "All mappers complete.\n"; @@ -575,7 +573,11 @@ sub enseg { while (my $line=<SRC>){ chomp $line; if ($line =~ /^\s*<seg/i) { + if($line =~ /id="[0-9]+"/) { print NEWSRC "$line\n"; + } else { + die "When using segments with pre-generated <seg> tags, you must include a zero-based id attribute"; + } } else { print NEWSRC "<seg id=\"$i\">$line</seg>\n"; } diff --git a/vest/parallelize.pl b/vest/parallelize.pl index 47b77c79..b4783f91 100755 --- a/vest/parallelize.pl +++ b/vest/parallelize.pl @@ -82,7 +82,7 @@ sub preview_files { my @f=grep { ! ($skipempty && -z $_) } @$l; my $fn=join(' ',map {escape_shell($_)} @f); my $cmd="tail -n $n $fn"; - check_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":""); + unchecked_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":""); } sub prefix_dirname($) { #like `dirname but if ends in / then return the whole thing @@ -283,7 +283,8 @@ sub numof_live_jobs { if ($use_fork) { die "not implemented"; } else { - my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat"))); + # We can probably continue decoding if the qstat error is only temporary + my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat"))); return ($#livejobs + 1); } } @@ -323,7 +324,7 @@ sub launch_job { } if ($joblist == "") { $joblist = $jobid; } else {$joblist = $joblist . "\|" . $jobid; } - my $cleanfn=check_output("qdel $jobid 2> /dev/null"); + my $cleanfn="qdel $jobid 2> /dev/null"; push(@cleanup_cmds, $cleanfn); } close QOUT; @@ -346,7 +347,7 @@ sub launch_job_fork { my ($fh, $scr_name) = get_temp_script(); print $fh $script; close $fh; - my $todo = "/bin/sh $scr_name 1> $outfile 2> $errorfile"; + my $todo = "/bin/bash -xeo pipefail $scr_name 1> $outfile 2> $errorfile"; print STDERR "EXEC: $todo\n"; my $out = check_output("$todo"); print STDERR "RES: $out\n"; |