summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2011-03-16 19:49:13 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2011-03-16 19:49:13 -0400
commit1fd0d2e5cde362fec6dc08c919185a041eeee575 (patch)
tree055dfa285db6da0296a70fd6f6762a980f61e80b
parent40c99c985875dd8bae17a366e63a6038ba70067f (diff)
parentcf7f57db91c7f32366583659f33837fa989c0603 (diff)
Merge branch 'master' of github.com:redpony/cdec
-rw-r--r--.gitignore1
-rw-r--r--SConstruct31
-rw-r--r--decoder/ff_wordset.h1
-rwxr-xr-xvest/dist-vest.pl12
-rwxr-xr-xvest/parallelize.pl9
5 files changed, 43 insertions, 11 deletions
diff --git a/.gitignore b/.gitignore
index 3892891c..2a287bbc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -120,3 +120,4 @@ gi/posterior-regularisation/prjava/lib/prjava-20100715.jar
*.dvi
*.ps
*.toc
+*~ \ No newline at end of file
diff --git a/SConstruct b/SConstruct
index 1a7885bc..c21d85d5 100644
--- a/SConstruct
+++ b/SConstruct
@@ -11,6 +11,9 @@ AddOption('--with-glc', dest='glc', type='string', nargs=1, action='store', meta
AddOption('--efence', dest='efence', action='store_true',
help='use electric fence for debugging memory corruptions')
+# TODO: Troll http://www.scons.org/wiki/SconsAutoconf
+# for some initial autoconf-like steps
+
platform = ARGUMENTS.get('OS', Platform())
include = Split('decoder utils klm mteval .')
env = Environment(PREFIX=GetOption('prefix'),
@@ -45,7 +48,7 @@ if glc:
srcs.append(glc+'/feature-factory.cc')
srcs.append(glc+'/cdec/ff_glc.cc')
-for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mteval/*.cc']:
+for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mteval/*.cc', 'vest/*.cc']:
srcs.extend([ file for file in Glob(pattern)
if not 'test' in str(file)
and 'build_binary.cc' not in str(file)
@@ -53,6 +56,30 @@ for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mtev
and 'mbr_kbest.cc' not in str(file)
and 'sri.cc' not in str(file)
and 'fast_score.cc' not in str(file)
+ and 'cdec.cc' not in str(file)
+ and 'mr_' not in str(file)
])
-env.Program(target='decoder/cdec', source=srcs)
+print 'Found {0} source files'.format(len(srcs))
+def comb(cc, srcs):
+ x = [cc]
+ x.extend(srcs)
+ return x
+
+env.Program(target='decoder/cdec', source=comb('decoder/cdec.cc', srcs))
+# TODO: The various decoder tests
+# TODO: extools
+env.Program(target='klm/lm/build_binary', source=comb('klm/lm/build_binary.cc', srcs))
+# TODO: klm ngram_query and tests
+env.Program(target='mteval/fast_score', source=comb('mteval/fast_score.cc', srcs))
+env.Program(target='mteval/mbr_kbest', source=comb('mteval/mbr_kbest.cc', srcs))
+#env.Program(target='mteval/scorer_test', source=comb('mteval/fast_score.cc', srcs))
+# TODO: phrasinator
+# TODO: Various training binaries
+env.Program(target='vest/sentserver', source=['vest/sentserver.c'], LINKFLAGS='-all-static')
+env.Program(target='vest/sentclient', source=['vest/sentclient.c'], LINKFLAGS='-all-static')
+env.Program(target='vest/mr_vest_generate_mapper_input', source=comb('vest/mr_vest_generate_mapper_input.cc', srcs))
+env.Program(target='vest/mr_vest_map', source=comb('vest/mr_vest_map.cc', srcs))
+env.Program(target='vest/mr_vest_reduce', source=comb('vest/mr_vest_reduce.cc', srcs))
+#env.Program(target='vest/lo_test', source=comb('vest/lo_test.cc', srcs))
+# TODO: util tests
diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h
index 00e1145b..643097ef 100644
--- a/decoder/ff_wordset.h
+++ b/decoder/ff_wordset.h
@@ -32,6 +32,7 @@ class WordSet : public FeatureFunction {
~WordSet() {
}
+ Features features() const { return single_feature(fid_); }
protected:
virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index 973a29ef..d17d7de1 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -194,7 +194,6 @@ sub modbin {
my $src=$$_;
$$_="$bindir/".basename($src);
check_call("cp -p $src $$_");
- die "cp $src $$_ failed: $!" unless $? == 0;
}
}
sub dirsize {
@@ -372,13 +371,12 @@ while (1){
if ($first_shard) { print STDERR "$script\n"; $first_shard=0; }
$nmappers++;
- my $qcmd = "QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file";
+ my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file";
my $jobid = check_output("$qcmd");
- die "qsub failed: $!\nCMD was: $qcmd" unless $? == 0;
chomp $jobid;
$jobid =~ s/^(\d+)(.*?)$/\1/g;
$jobid =~ s/^Your job (\d+) .*$/\1/;
- push(@cleanupcmds, check_output("qdel $jobid 2> /dev/null"));
+ push(@cleanupcmds, "qdel $jobid 2> /dev/null");
print STDERR " $jobid";
if ($joblist == "") { $joblist = $jobid; }
else {$joblist = $joblist . "\|" . $jobid; }
@@ -398,7 +396,7 @@ while (1){
print STDERR "Waiting for mappers to complete...\n";
while ($nmappers > 0) {
sleep 5;
- my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat | grep -v ' C '")));
+ my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '")));
$nmappers = scalar @livejobs;
}
print STDERR "All mappers complete.\n";
@@ -575,7 +573,11 @@ sub enseg {
while (my $line=<SRC>){
chomp $line;
if ($line =~ /^\s*<seg/i) {
+ if($line =~ /id="[0-9]+"/) {
print NEWSRC "$line\n";
+ } else {
+ die "When using segments with pre-generated <seg> tags, you must include a zero-based id attribute";
+ }
} else {
print NEWSRC "<seg id=\"$i\">$line</seg>\n";
}
diff --git a/vest/parallelize.pl b/vest/parallelize.pl
index 47b77c79..b4783f91 100755
--- a/vest/parallelize.pl
+++ b/vest/parallelize.pl
@@ -82,7 +82,7 @@ sub preview_files {
my @f=grep { ! ($skipempty && -z $_) } @$l;
my $fn=join(' ',map {escape_shell($_)} @f);
my $cmd="tail -n $n $fn";
- check_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":"");
+ unchecked_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":"");
}
sub prefix_dirname($) {
#like `dirname but if ends in / then return the whole thing
@@ -283,7 +283,8 @@ sub numof_live_jobs {
if ($use_fork) {
die "not implemented";
} else {
- my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat")));
+ # We can probably continue decoding if the qstat error is only temporary
+ my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat")));
return ($#livejobs + 1);
}
}
@@ -323,7 +324,7 @@ sub launch_job {
}
if ($joblist == "") { $joblist = $jobid; }
else {$joblist = $joblist . "\|" . $jobid; }
- my $cleanfn=check_output("qdel $jobid 2> /dev/null");
+ my $cleanfn="qdel $jobid 2> /dev/null";
push(@cleanup_cmds, $cleanfn);
}
close QOUT;
@@ -346,7 +347,7 @@ sub launch_job_fork {
my ($fh, $scr_name) = get_temp_script();
print $fh $script;
close $fh;
- my $todo = "/bin/sh $scr_name 1> $outfile 2> $errorfile";
+ my $todo = "/bin/bash -xeo pipefail $scr_name 1> $outfile 2> $errorfile";
print STDERR "EXEC: $todo\n";
my $out = check_output("$todo");
print STDERR "RES: $out\n";