From 3dfa575d202c9277060bc43a7af9351702da9f12 Mon Sep 17 00:00:00 2001
From: Jonathan Clark <jon.h.clark@gmail.com>
Date: Fri, 11 Mar 2011 09:05:04 -0500
Subject: fix my dumb bug that killed qsub functionality

---
 vest/dist-vest.pl   | 2 --
 vest/parallelize.pl | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index 973a29ef..f6f661b9 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -194,7 +194,6 @@ sub modbin {
         my $src=$$_;
         $$_="$bindir/".basename($src);
         check_call("cp -p $src $$_");
-        die "cp $src $$_ failed: $!" unless $? == 0;
     }
 }
 sub dirsize {
@@ -374,7 +373,6 @@ while (1){
 				$nmappers++;
 				my $qcmd = "QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file";
 				my $jobid = check_output("$qcmd");
-				die "qsub failed: $!\nCMD was: $qcmd" unless $? == 0;
 				chomp $jobid;
 				$jobid =~ s/^(\d+)(.*?)$/\1/g;
 				$jobid =~ s/^Your job (\d+) .*$/\1/;
diff --git a/vest/parallelize.pl b/vest/parallelize.pl
index 47b77c79..2798a303 100755
--- a/vest/parallelize.pl
+++ b/vest/parallelize.pl
@@ -82,7 +82,7 @@ sub preview_files {
     my @f=grep { ! ($skipempty && -z $_) } @$l;
     my $fn=join(' ',map {escape_shell($_)} @f);
     my $cmd="tail -n $n $fn";
-    check_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":"");
+    unchecked_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":"");
 }
 sub prefix_dirname($) {
     #like `dirname but if ends in / then return the whole thing
@@ -323,7 +323,7 @@ sub launch_job {
             }
       if ($joblist == "") { $joblist = $jobid; }
       else {$joblist = $joblist . "\|" . $jobid; }
-            my $cleanfn=check_output("qdel $jobid 2> /dev/null");
+      my $cleanfn="qdel $jobid 2> /dev/null";
       push(@cleanup_cmds, $cleanfn);
     }
     close QOUT;
-- 
cgit v1.2.3


From 13b15df6a00137395eae03ba3f33a987a916257b Mon Sep 17 00:00:00 2001
From: Jonathan Clark <jon.h.clark@gmail.com>
Date: Fri, 11 Mar 2011 10:00:53 -0500
Subject: another dumb bug involving cleanup being executed preemptively

---
 vest/dist-vest.pl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index f6f661b9..c27af804 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -371,12 +371,12 @@ while (1){
 				if ($first_shard) { print STDERR "$script\n"; $first_shard=0; }
 
 				$nmappers++;
-				my $qcmd = "QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file";
+				my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file";
 				my $jobid = check_output("$qcmd");
 				chomp $jobid;
 				$jobid =~ s/^(\d+)(.*?)$/\1/g;
 				$jobid =~ s/^Your job (\d+) .*$/\1/;
-		 	 	push(@cleanupcmds, check_output("qdel $jobid 2> /dev/null"));
+		 	 	push(@cleanupcmds, "qdel $jobid 2> /dev/null");
 				print STDERR " $jobid";
 				if ($joblist == "") { $joblist = $jobid; }
 				else {$joblist = $joblist . "\|" . $jobid; }
-- 
cgit v1.2.3


From 702df29b83ca10998ea3a8f84bc2e0e6c9e86eea Mon Sep 17 00:00:00 2001
From: Jonathan Clark <jon.h.clark@gmail.com>
Date: Fri, 11 Mar 2011 10:01:09 -0500
Subject: ignore emacs temp files

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 3892891c..2a287bbc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -120,3 +120,4 @@ gi/posterior-regularisation/prjava/lib/prjava-20100715.jar
 *.dvi
 *.ps
 *.toc
+*~
\ No newline at end of file
-- 
cgit v1.2.3


From 25d5729b850d1dc62eaf151b5550bd83963b08e8 Mon Sep 17 00:00:00 2001
From: Jonathan Clark <jon.h.clark@gmail.com>
Date: Fri, 11 Mar 2011 10:09:18 -0500
Subject: dont die when there are no running jobs

---
 vest/dist-vest.pl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index c27af804..cfddf61c 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/env perl
+#grep!/usr/bin/env perl
 use strict;
 my @ORIG_ARGV=@ARGV;
 use Cwd qw(getcwd);
@@ -396,7 +396,7 @@ while (1){
 			print STDERR "Waiting for mappers to complete...\n";
 			while ($nmappers > 0) {
 			  sleep 5;
-			  my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat | grep -v ' C '")));
+			  my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat | awk '{if($0 !~ \" C \"){print}}'")));
 			  $nmappers = scalar @livejobs;
 			}
 			print STDERR "All mappers complete.\n";
-- 
cgit v1.2.3


From db200aeefcfad33e789a8790961ef5c0f66d8ba3 Mon Sep 17 00:00:00 2001
From: Jonathan Clark <jon.h.clark@gmail.com>
Date: Fri, 11 Mar 2011 10:13:20 -0500
Subject: fail

---
 vest/dist-vest.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index cfddf61c..6a5959dc 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -1,4 +1,4 @@
-#grep!/usr/bin/env perl
+#!/usr/bin/env perl
 use strict;
 my @ORIG_ARGV=@ARGV;
 use Cwd qw(getcwd);
-- 
cgit v1.2.3


From 92ca6e23b39043ad026c07a5aab71ffc750c1db2 Mon Sep 17 00:00:00 2001
From: Jonathan Clark <jon.h.clark@gmail.com>
Date: Fri, 11 Mar 2011 10:22:31 -0500
Subject: just use grep and dont check return code

---
 vest/dist-vest.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index 6a5959dc..f95754dc 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -396,7 +396,7 @@ while (1){
 			print STDERR "Waiting for mappers to complete...\n";
 			while ($nmappers > 0) {
 			  sleep 5;
-			  my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat | awk '{if($0 !~ \" C \"){print}}'")));
+			  my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '")));
 			  $nmappers = scalar @livejobs;
 			}
 			print STDERR "All mappers complete.\n";
-- 
cgit v1.2.3


From 6b25a85dd45af5982e07577b33c64e3b577579c3 Mon Sep 17 00:00:00 2001
From: Jonathan Clark <jon.h.clark@gmail.com>
Date: Fri, 11 Mar 2011 10:27:43 -0500
Subject: dont fail on possibly temporary qstat errors

---
 vest/parallelize.pl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vest/parallelize.pl b/vest/parallelize.pl
index 2798a303..c2526503 100755
--- a/vest/parallelize.pl
+++ b/vest/parallelize.pl
@@ -283,7 +283,8 @@ sub numof_live_jobs {
   if ($use_fork) {
     die "not implemented";
   } else {
-    my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat")));
+    # We can probably continue decoding if the qstat error is only temporary
+    my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat")));
     return ($#livejobs + 1);
   }
 }
-- 
cgit v1.2.3


From eda8d83cd957463d32980da7c60085a820f7eae0 Mon Sep 17 00:00:00 2001
From: Jonathan Clark <jon.h.clark@gmail.com>
Date: Fri, 11 Mar 2011 11:06:48 -0500
Subject: be more verbose when running each child decoder process when forking.
 also, avoid some non-bash errors

---
 vest/parallelize.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vest/parallelize.pl b/vest/parallelize.pl
index c2526503..b4783f91 100755
--- a/vest/parallelize.pl
+++ b/vest/parallelize.pl
@@ -347,7 +347,7 @@ sub launch_job_fork {
     my ($fh, $scr_name) = get_temp_script();
     print $fh $script;
     close $fh;
-    my $todo = "/bin/sh $scr_name 1> $outfile 2> $errorfile";
+    my $todo = "/bin/bash -xeo pipefail $scr_name 1> $outfile 2> $errorfile";
     print STDERR "EXEC: $todo\n";
     my $out = check_output("$todo");
     print STDERR "RES: $out\n";
-- 
cgit v1.2.3


From dccf47501f078a354375b9f3edd481d8c8d30268 Mon Sep 17 00:00:00 2001
From: Jonathan Clark <jon.h.clark@gmail.com>
Date: Mon, 14 Mar 2011 17:03:51 -0400
Subject: more paranoid checking when (idiot/time-crunched) user tries to
 define his own <seg> tags during tuning

---
 vest/dist-vest.pl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl
index f95754dc..d17d7de1 100755
--- a/vest/dist-vest.pl
+++ b/vest/dist-vest.pl
@@ -573,7 +573,11 @@ sub enseg {
 	while (my $line=<SRC>){
 		chomp $line;
 		if ($line =~ /^\s*<seg/i) {
+		    if($line =~ /id="[0-9]+"/) {
 			print NEWSRC "$line\n";
+		    } else {
+			die "When using segments with pre-generated <seg> tags, you must include a zero-based id attribute";
+		    }
 		} else {
 			print NEWSRC "<seg id=\"$i\">$line</seg>\n";
 		}
-- 
cgit v1.2.3


From 5d0f3c6aa4e78aea09952a7a65f61d3c4dce0a0e Mon Sep 17 00:00:00 2001
From: Jonathan Clark <jon.h.clark@gmail.com>
Date: Mon, 14 Mar 2011 17:05:14 -0400
Subject: Fix wordset to override features() so that we can safely use multiple
 instances of it

---
 decoder/ff_wordset.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h
index 00e1145b..643097ef 100644
--- a/decoder/ff_wordset.h
+++ b/decoder/ff_wordset.h
@@ -32,6 +32,7 @@ class WordSet : public FeatureFunction {
   ~WordSet() {
   }
 
+  Features features() const { return single_feature(fid_); }
 
  protected:
   virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
-- 
cgit v1.2.3


From 237de3db6d5917707b745e3df7be42f2497e3783 Mon Sep 17 00:00:00 2001
From: Jonathan Clark <jon.h.clark@gmail.com>
Date: Mon, 14 Mar 2011 17:39:04 -0400
Subject: Get enough compiling with scons to finish off the emnlp paper

---
 SConstruct | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/SConstruct b/SConstruct
index 1a7885bc..c21d85d5 100644
--- a/SConstruct
+++ b/SConstruct
@@ -11,6 +11,9 @@ AddOption('--with-glc', dest='glc', type='string', nargs=1, action='store', meta
 AddOption('--efence', dest='efence', action='store_true',
                   help='use electric fence for debugging memory corruptions')
 
+# TODO: Troll http://www.scons.org/wiki/SconsAutoconf
+# for some initial autoconf-like steps
+
 platform = ARGUMENTS.get('OS', Platform())
 include = Split('decoder utils klm mteval .')
 env = Environment(PREFIX=GetOption('prefix'),
@@ -45,7 +48,7 @@ if glc:
    srcs.append(glc+'/feature-factory.cc')
    srcs.append(glc+'/cdec/ff_glc.cc')
 
-for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mteval/*.cc']:
+for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mteval/*.cc', 'vest/*.cc']:
     srcs.extend([ file for file in Glob(pattern)
     		       if not 'test' in str(file)
 		       	  and 'build_binary.cc' not in str(file)
@@ -53,6 +56,30 @@ for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mtev
 			  and 'mbr_kbest.cc' not in str(file)
 			  and 'sri.cc' not in str(file)
 			  and 'fast_score.cc' not in str(file)
+                          and 'cdec.cc' not in str(file)
+                          and 'mr_' not in str(file)
 		])
 
-env.Program(target='decoder/cdec', source=srcs)
+print 'Found {0} source files'.format(len(srcs))
+def comb(cc, srcs):
+   x = [cc]
+   x.extend(srcs)
+   return x
+
+env.Program(target='decoder/cdec', source=comb('decoder/cdec.cc', srcs))
+# TODO: The various decoder tests
+# TODO: extools
+env.Program(target='klm/lm/build_binary', source=comb('klm/lm/build_binary.cc', srcs))
+# TODO: klm ngram_query and tests
+env.Program(target='mteval/fast_score', source=comb('mteval/fast_score.cc', srcs))
+env.Program(target='mteval/mbr_kbest', source=comb('mteval/mbr_kbest.cc', srcs))
+#env.Program(target='mteval/scorer_test', source=comb('mteval/fast_score.cc', srcs))
+# TODO: phrasinator
+# TODO: Various training binaries
+env.Program(target='vest/sentserver', source=['vest/sentserver.c'], LINKFLAGS='-all-static')
+env.Program(target='vest/sentclient', source=['vest/sentclient.c'], LINKFLAGS='-all-static')
+env.Program(target='vest/mr_vest_generate_mapper_input', source=comb('vest/mr_vest_generate_mapper_input.cc', srcs))
+env.Program(target='vest/mr_vest_map', source=comb('vest/mr_vest_map.cc', srcs))
+env.Program(target='vest/mr_vest_reduce', source=comb('vest/mr_vest_reduce.cc', srcs))
+#env.Program(target='vest/lo_test', source=comb('vest/lo_test.cc', srcs))
+# TODO: util tests
-- 
cgit v1.2.3