summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--configure.ac2
-rw-r--r--decoder/Makefile.am7
-rw-r--r--training/Makefile.am6
-rwxr-xr-xtraining/cluster-ptrain.pl21
-rw-r--r--vest/Makefile.am8
5 files changed, 30 insertions, 14 deletions
diff --git a/configure.ac b/configure.ac
index 0fd43e08..efb32284 100644
--- a/configure.ac
+++ b/configure.ac
@@ -11,8 +11,8 @@ CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
AC_CHECK_HEADER(boost/math/special_functions/digamma.hpp,
[AC_DEFINE([HAVE_BOOST_DIGAMMA], [], [flag for boost::math::digamma])])
-GTEST_LIB_CHECK
AC_PROG_INSTALL
+GTEST_LIB_CHECK
AC_ARG_WITH(srilm,
[AC_HELP_STRING([--with-srilm=PATH], [(optional) path to SRI's LM toolkit])],
diff --git a/decoder/Makefile.am b/decoder/Makefile.am
index 4c86ae6f..19c22bc5 100644
--- a/decoder/Makefile.am
+++ b/decoder/Makefile.am
@@ -1,4 +1,7 @@
-bin_PROGRAMS = \
+bin_PROGRAMS = cdec
+
+if HAVE_GTEST
+noinst_PROGRAMS = \
dict_test \
weights_test \
trule_test \
@@ -6,8 +9,8 @@ bin_PROGRAMS = \
ff_test \
parser_test \
grammar_test \
- cdec \
small_vector_test
+endif
cdec_SOURCES = cdec.cc forest_writer.cc maxtrans_blunsom.cc cdec_ff.cc ff_factory.cc timing_stats.cc
small_vector_test_SOURCES = small_vector_test.cc
diff --git a/training/Makefile.am b/training/Makefile.am
index 944c75f7..b44812e7 100644
--- a/training/Makefile.am
+++ b/training/Makefile.am
@@ -4,9 +4,11 @@ bin_PROGRAMS = \
grammar_convert \
atools \
plftools \
- lbfgs_test \
mr_em_train \
- collapse_weights \
+ collapse_weights
+
+noinst_PROGRAMS = \
+ lbfgs_test \
optimize_test
atools_SOURCES = atools.cc
diff --git a/training/cluster-ptrain.pl b/training/cluster-ptrain.pl
index 8944ae34..7643d4e5 100755
--- a/training/cluster-ptrain.pl
+++ b/training/cluster-ptrain.pl
@@ -1,14 +1,13 @@
#!/usr/bin/perl -w
use strict;
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; }
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path getcwd /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR; }
use Getopt::Long;
my $MAX_ITER_ATTEMPTS = 5; # number of times to retry a failed function evaluation
-my $CWD=`pwd`; chomp $CWD;
-my $BIN_DIR = $SCRIPT_DIR;
-my $OPTIMIZER = "$BIN_DIR/mr_optimize_reduce";
-my $DECODER = "$BIN_DIR/../decoder/cdec";
+my $CWD=getcwd();
+my $OPTIMIZER = "$SCRIPT_DIR/mr_optimize_reduce";
+my $DECODER = "$SCRIPT_DIR/../decoder/cdec";
my $COMBINER_CACHE_SIZE = 150;
# This is a hack to run this on a weird cluster,
# eventually, I'll provide Hadoop scripts.
@@ -30,13 +29,15 @@ my $PRIOR;
my $OALG = "lbfgs";
my $sigsq = 1;
my $means_file;
+my $RESTART_IF_NECESSARY;
GetOptions("cdec=s" => \$DECODER,
- "run_locally" => \$LOCAL,
"distributed" => \$DISTRIBUTED,
"sigma_squared=f" => \$sigsq,
+ "max_iteration=i" => \$max_iteration,
"means=s" => \$means_file,
"optimizer=s" => \$OALG,
"gaussian_prior" => \$PRIOR,
+ "restart_if_necessary" => \$RESTART_IF_NECESSARY,
"jobs=i" => \$nodes,
"pmem=s" => \$pmem
) or usage();
@@ -61,6 +62,11 @@ if ($parallel) {
unless ($parallel) { $CFLAG = "-C 500"; }
unless ($config_file =~ /^\//) { $config_file = $CWD . '/' . $config_file; }
my $clines = num_lines($training_corpus);
+my $dir = "$CWD/ptrain";
+
+if ($RESTART_IF_NECESSARY && -d $dir) {
+ $restart = 1;
+}
print STDERR <<EOT;
PTRAIN CONFIGURATION INFORMATION
@@ -83,7 +89,6 @@ my $nodelist="1";
for (my $i=1; $i<$nodes; $i++) { $nodelist .= " 1"; }
my $iter = 1;
-my $dir = "$CWD/ptrain";
if ($restart) {
die "$dir doesn't exist, but --restart specified!\n" unless -d $dir;
my $o = `ls -t $dir/weights.*`;
@@ -161,6 +166,7 @@ while ($iter < $max_iteration) {
}
print "FINAL WEIGHTS: $dir/weights.$iter\n";
+`mv $dir/weights.$iter.gz $dir/weights.final.gz`;
sub usage {
die <<EOT;
@@ -170,6 +176,7 @@ Usage: $0 [OPTIONS] cdec.ini training.corpus weights.init
Options:
--distributed Parallelize function evaluation
+ --jobs N Number of jobs to use
--cdec PATH Path to cdec binary
--optimize OPT lbfgs, rprop, sgd
--gaussian_prior add Gaussian prior
diff --git a/vest/Makefile.am b/vest/Makefile.am
index d7d08133..80ad7218 100644
--- a/vest/Makefile.am
+++ b/vest/Makefile.am
@@ -1,12 +1,16 @@
bin_PROGRAMS = \
mr_vest_map \
mr_vest_reduce \
- scorer_test \
- lo_test \
mr_vest_generate_mapper_input \
fast_score \
union_forests
+if HAVE_GTEST
+noinst_PROGRAMS = \
+ scorer_test \
+ lo_test
+endif
+
union_forests_SOURCES = union_forests.cc
union_forests_LDADD = $(top_srcdir)/decoder/libcdec.a -lz