From 1c6ba93d7f9d46186b05c07cd5208793554c06af Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 10 Mar 2011 14:01:39 -0500 Subject: configuration for starcluster clusters --- environment/LocalConfig.pm | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm index 10933f36..7ea1b416 100644 --- a/environment/LocalConfig.pm +++ b/environment/LocalConfig.pm @@ -12,6 +12,10 @@ my $host = domainname; # keys are: HOST_REGEXP, MERTMem, QSubQueue, QSubMemFlag, QSubExtraFlags my $CCONFIG = { + 'StarCluster' => { + 'HOST_REGEXP' => qr/compute-\d+\.internal$/, + 'QSubMemFlag' => '-l mem', + }, 'LTICluster' => { 'HOST_REGEXP' => qr/^cluster\d+\.lti\.cs\.cmu\.edu$/, 'QSubMemFlag' => '-l h_vmem=', @@ -37,8 +41,8 @@ my $CCONFIG = { 'QSubMemFlag' => '-l pmem=', }, 'LOCAL' => { - 'HOST_REGEXP' => qr/local\.net$/, - 'QSubMemFlag' => '', + 'HOST_REGEXP' => qr/local\./, + 'QSubMemFlag' => ' ', }, }; -- cgit v1.2.3 From faf9e73708e745e43405531c6fc1a33bd192260c Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 10 Mar 2011 16:10:34 -0500 Subject: Fix broken klm file for de compounding --- compound-split/de/charlm.rev.5gm.de.klm | Bin 14148755 -> 17376695 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/compound-split/de/charlm.rev.5gm.de.klm b/compound-split/de/charlm.rev.5gm.de.klm index 574898dc..e8d114bd 100644 Binary files a/compound-split/de/charlm.rev.5gm.de.klm and b/compound-split/de/charlm.rev.5gm.de.klm differ -- cgit v1.2.3 From f7d23a8ea67e9d7e58e9e5d08465c5d1f70d482d Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Thu, 10 Mar 2011 16:11:31 -0500 Subject: updated test arpa file --- klm/lm/test.arpa | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/klm/lm/test.arpa b/klm/lm/test.arpa index 9d674e83..ef214eae 100644 --- a/klm/lm/test.arpa +++ b/klm/lm/test.arpa @@ -1,17 +1,17 @@ \data\ -ngram 1=34 -ngram 2=43 -ngram 3=8 -ngram 4=5 -ngram 5=3 +ngram 1=37 +ngram 2=47 +ngram 3=11 +ngram 4=6 +ngram 5=4 \1-grams: -1.383514 , -0.30103 -1.139057 . -0.845098 -1.029493 -99 -0.4149733 --1.995635 +-1.995635 -20 -1.285941 a -0.69897 -1.687872 also -0.30103 -1.687872 beyond -0.30103 @@ -41,6 +41,9 @@ ngram 5=3 -1.687872 watching -0.30103 -1.687872 what -0.30103 -1.687872 would -0.30103 +-3.141592 foo +-2.718281 bar 3.0 +-6.535897 baz -0.0 \2-grams: -0.6925742 , . @@ -86,6 +89,10 @@ ngram 5=3 -0.2922095 watching considering -0.2922095 what i -0.2922095 would also +-2 also would -6 +-15 -2 +-4 however -1 +-6 foo bar \3-grams: -0.01916512 more . @@ -96,6 +103,9 @@ ngram 5=3 -0.3488368 looking on -0.4771212 -0.1892331 little more loin -0.04835128 looking on a -0.4771212 +-3 also would consider -7 +-6 however -12 +-7 to look good \4-grams: -0.009249173 looking on a little -0.4771212 @@ -103,10 +113,12 @@ ngram 5=3 -0.005464747 screening a little more -0.1453306 a little more loin -0.01552657 looking on a -0.4771212 +-4 also would consider higher -8 \5-grams: -0.003061223 looking on a little -0.001813953 looking on a little more -0.0432557 on a little more loin +-5 also would consider higher looking \end\ -- cgit v1.2.3 From 9f821dd7f08da5a146e14863b3d49ae16e6739f4 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Thu, 10 Mar 2011 17:09:21 -0500 Subject: initial version of scons --- Makefile.am | 2 +- SConstruct | 54 ++++++++++++++++++++++ decoder/Makefile.am | 4 +- decoder/ff_wordset.h | 6 ++- environment/LocalConfig.pm | 4 ++ vest/dist-vest.pl | 112 +++++++++++++++++++-------------------------- vest/parallelize.pl | 33 +++++++------ 7 files changed, 129 insertions(+), 86 deletions(-) create mode 100644 SConstruct diff --git a/Makefile.am b/Makefile.am index a808c211..0e7b3885 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5,5 +5,5 @@ SUBDIRS = utils mteval klm/util klm/lm decoder phrasinator training vest extools AUTOMAKE_OPTIONS = foreign ACLOCAL_AMFLAGS = -I m4 -AM_CPPFLAGS = -D_GLIBCXX_PARALLEL +AM_CPPFLAGS = -D_GLIBCXX_PARALLEL diff --git a/SConstruct b/SConstruct new file mode 100644 index 00000000..e61af175 --- /dev/null +++ b/SConstruct @@ -0,0 +1,54 @@ +AddOption('--prefix', dest='prefix', type='string', nargs=1, action='store', metavar='DIR', + help='installation prefix') +AddOption('--with-boost', dest='boost', type='string', nargs=1, action='store', metavar='DIR', + help='boost installation directory (if in a non-standard location)') +AddOption('--with-glc', dest='glc', type='string', nargs=1, action='store', metavar='DIR', + help='path to Global Lexical Coherence package (optional)') +AddOption('--efence', dest='efence', action='store_true', + help='use electric fence for debugging memory corruptions') + +platform = ARGUMENTS.get('OS', Platform()) +include = Split('decoder utils klm mteval .') +env = Environment(PREFIX=GetOption('prefix'), + PLATFORM = platform, +# BINDIR = bin, +# INCDIR = include, +# LIBDIR = lib, + CPPPATH = include, + LIBPATH = [], + LIBS = Split('boost_program_options boost_serialization boost_thread z'), + CCFLAGS=Split('-g -O3')) + +boost = GetOption('boost') +if boost: + print 'Using Boost at {0}'.format(boost) + env.Append(CPPPATH=boost+'/include', + LIBPATH=boost+'/lib') + +if GetOption('efence'): + env.Append(LIBS=Split('efence Segfault')) + +srcs = [] + +# TODO: Get rid of config.h + +glc = GetOption('glc') +if glc: + print 'Using Global Lexical Coherence package at {0}'.format(glc) + env.Append(CCFLAGS='-DHAVE_GLC', + CPPPATH=[glc, glc+'/cdec']) + srcs.append(glc+'/string_util.cc') + srcs.append(glc+'/feature-factory.cc') + srcs.append(glc+'/cdec/ff_glc.cc') + +for pattern in ['decoder/*.cc', 'decoder/*.c', 'klm/*/*.cc', 'utils/*.cc', 'mteval/*.cc']: + srcs.extend([ file for file in Glob(pattern) + if not 'test' in str(file) + and 'build_binary.cc' not in str(file) + and 'ngram_query.cc' not in str(file) + and 'mbr_kbest.cc' not in str(file) + and 'sri.cc' not in str(file) + and 'fast_score.cc' not in str(file) + ]) + +env.Program(target='decoder/cdec', source=srcs) diff --git a/decoder/Makefile.am b/decoder/Makefile.am index 9cf4c3c4..e1dba497 100644 --- a/decoder/Makefile.am +++ b/decoder/Makefile.am @@ -26,7 +26,7 @@ hg_test_SOURCES = hg_test.cc hg_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz trule_test_SOURCES = trule_test.cc trule_test_LDADD = $(GTEST_LDFLAGS) $(GTEST_LIBS) libcdec.a ../mteval/libmteval.a ../utils/libutils.a -lz -AM_CPPFLAGS = -W -Wall -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm +AM_CPPFLAGS = -W -Wno-sign-compare $(GTEST_CPPFLAGS) -I.. -I../mteval -I../utils -I../klm rule_lexer.cc: rule_lexer.l $(LEX) -s -CF -8 -o$@ $< @@ -82,5 +82,5 @@ libcdec_a_SOURCES = \ if GLC # Until we build GLC as a library... - libcdec_a_SOURCES += ff_glc.cc + libcdec_a_SOURCES += ff_glc.cc string_util.cc feature-factory.cc endif diff --git a/decoder/ff_wordset.h b/decoder/ff_wordset.h index 256d54bb..00e1145b 100644 --- a/decoder/ff_wordset.h +++ b/decoder/ff_wordset.h @@ -13,13 +13,12 @@ class WordSet : public FeatureFunction { public: - // we depend on the order of the initializer list // to call member constructurs in the proper order // modify this carefully! // // Usage: "WordSet -v vocab.txt [--oov]" - WordSet(const std::string& param) { + WordSet(const std::string& param) { std::string vocabFile; std::string featName; parseArgs(param, &featName, &vocabFile, &oovMode_); @@ -30,6 +29,9 @@ class WordSet : public FeatureFunction { loadVocab(vocabFile, &vocab_); } + ~WordSet() { + } + protected: virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm index 10933f36..dd3ef761 100644 --- a/environment/LocalConfig.pm +++ b/environment/LocalConfig.pm @@ -36,6 +36,10 @@ my $CCONFIG = { 'HOST_REGEXP' => qr/^(blacklight.psc.edu|bl1.psc.teragrid.org|bl0.psc.teragrid.org)$/, 'QSubMemFlag' => '-l pmem=', }, + 'Barrow/Chicago' => { + 'HOST_REGEXP' => qr/^(barrow|chicago).lti.cs.cmu.edu$/, + 'QSubMemFlag' => '-l pmem=', + }, 'LOCAL' => { 'HOST_REGEXP' => qr/local\.net$/, 'QSubMemFlag' => '', diff --git a/vest/dist-vest.pl b/vest/dist-vest.pl index 2a56dd55..973a29ef 100755 --- a/vest/dist-vest.pl +++ b/vest/dist-vest.pl @@ -1,16 +1,18 @@ #!/usr/bin/env perl - use strict; my @ORIG_ARGV=@ARGV; use Cwd qw(getcwd); my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; } + +# Skip local config (used for distributing jobs) if we're running in local-only mode use LocalConfig; use Getopt::Long; use IPC::Open2; -use strict; use POSIX ":sys_wait_h"; my $QSUB_CMD = qsub_args(mert_memory()); +require "libcall.pl"; + # Default settings my $srcFile; my $refFiles; @@ -22,6 +24,7 @@ my $MAPINPUT = "$bin_dir/mr_vest_generate_mapper_input"; my $MAPPER = "$bin_dir/mr_vest_map"; my $REDUCER = "$bin_dir/mr_vest_reduce"; my $parallelize = "$bin_dir/parallelize.pl"; +my $libcall = "$bin_dir/libcall.pl"; my $sentserver = "$bin_dir/sentserver"; my $sentclient = "$bin_dir/sentclient"; my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm"; @@ -31,6 +34,7 @@ die "Can't find $MAPPER" unless -x $MAPPER; my $cdec = "$bin_dir/../decoder/cdec"; die "Can't find decoder in $cdec" unless -x $cdec; die "Can't find $parallelize" unless -x $parallelize; +die "Can't find $libcall" unless -e $libcall; my $decoder = $cdec; my $lines_per_mapper = 400; my $rand_directions = 15; @@ -124,7 +128,7 @@ sub enseg; sub print_help; my $nodelist; -my $host =`hostname`; chomp $host; +my $host =check_output("hostname"); chomp $host; my $bleu; my $interval_count = 0; my $logfile; @@ -142,7 +146,7 @@ unless ($dir){ $dir = "vest"; } unless ($dir =~ /^\//){ # convert relative path to absolute path - my $basedir = `pwd`; + my $basedir = check_output("pwd"); chomp $basedir; $dir = "$basedir/$dir"; } @@ -158,15 +162,18 @@ my @cleanupcmds = (); sub cleanup { print STDERR "Cleanup...\n"; - for my $pid (@childpids){ `kill $pid`; } - for my $cmd (@cleanupcmds){`$cmd`; } + for my $pid (@childpids){ unchecked_call("kill $pid"); } + for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); } exit 1; }; +# Always call cleanup, no matter how we exit +*CORE::GLOBAL::exit = + sub{ cleanup(); }; $SIG{INT} = "cleanup"; $SIG{TERM} = "cleanup"; $SIG{HUP} = "cleanup"; -my $decoderBase = `basename $decoder`; chomp $decoderBase; +my $decoderBase = check_output("basename $decoder"); chomp $decoderBase; my $newIniFile = "$dir/$decoderBase.ini"; my $inputFileName = "$dir/input"; my $user = $ENV{"USER"}; @@ -181,12 +188,12 @@ use File::Basename qw(basename); sub modbin { local $_; my $bindir=shift; - `mkdir -p $bindir`; + check_call("mkdir -p $bindir"); -d $bindir || die "couldn't make bindir $bindir"; for (@_) { my $src=$$_; $$_="$bindir/".basename($src); - `cp -p $src $$_`; + check_call("cp -p $src $$_"); die "cp $src $$_ failed: $!" unless $? == 0; } } @@ -203,7 +210,7 @@ if ($dryrun){ } else { -e $dir || mkdir $dir; mkdir "$dir/hgs"; - modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$REDUCER,\$parallelize,\$sentserver,\$sentclient) if $cpbin; + modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$REDUCER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin; mkdir "$dir/scripts"; my $cmdfile="$dir/rerun-vest.sh"; open CMD,'>',$cmdfile; @@ -219,7 +226,7 @@ if ($dryrun){ print_help(); exit; } - `cp $initialWeights $dir/weights.0`; + check_call("cp $initialWeights $dir/weights.0"); die "Can't find weights.0" unless (-e "$dir/weights.0"); } write_config(*STDERR); @@ -227,7 +234,7 @@ if ($dryrun){ # Generate initial files and values -`cp $iniFile $newIniFile`; +check_call("cp $iniFile $newIniFile"); $iniFile = $newIniFile; my $newsrc = "$dir/dev.input"; @@ -259,12 +266,12 @@ while (1){ my $logdir="$dir/logs.$iteration"; my $decoderLog="$logdir/decoder.sentserver.log.$iteration"; my $scorerLog="$logdir/scorer.log.$iteration"; - `mkdir -p $logdir`; + check_call("mkdir -p $logdir"); #decode print STDERR "RUNNING DECODER AT "; - print STDERR `date`; + print STDERR unchecked_output("date"); my $im1 = $iteration - 1; my $weightsFile="$dir/weights.$im1"; my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs"; @@ -275,33 +282,28 @@ while (1){ if ($run_local) { $pcmd = "cat $srcFile |"; } elsif ($use_make) { - $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $decode_nodes --"; + # TODO: Throw error when decode_nodes is specified along with use_make + $pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $use_make --"; } else { $pcmd = "cat $srcFile | $parallelize $usefork -p $pmem -e $logdir -j $decode_nodes --"; } my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile"; print STDERR "COMMAND:\n$cmd\n"; - my $result = 0; - $result = system($cmd); - unless ($result == 0){ - cleanup(); - print STDERR "ERROR: Parallel decoder returned non-zero exit code $result\n"; - die; - } - my $num_hgs = `ls $dir/hgs/*.gz | wc -l`; + check_bash_call($cmd); + my $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l"); print STDERR "NUMBER OF HGs: $num_hgs\n"; die "Dev set contains $devSize sentences! Decoder failure?\n" if ($devSize != $num_hgs); - my $dec_score = `cat $runFile | $SCORER $refs_comma_sep -l $metric`; + my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -l $metric"); chomp $dec_score; print STDERR "DECODER SCORE: $dec_score\n"; # save space - `gzip -f $runFile`; - `gzip -f $decoderLog`; + check_call("gzip -f $runFile"); + check_call("gzip -f $decoderLog"); # run optimizer print STDERR "RUNNING OPTIMIZER AT "; - print STDERR `date`; + print STDERR unchecked_output("date"); my $mergeLog="$logdir/prune-merge.log.$iteration"; my $score = 0; @@ -309,28 +311,18 @@ while (1){ my $inweights="$dir/weights.$im1"; for (my $opt_iter=1; $opt_iter<$optimization_iters; $opt_iter++) { print STDERR "\nGENERATE OPTIMIZATION STRATEGY (OPT-ITERATION $opt_iter/$optimization_iters)\n"; - print STDERR `date`; + print STDERR unchecked_output("date"); $icc++; my $nop=$noprimary?"--no_primary":""; my $targs=$oraclen ? "--decoder_translations='$runFile.gz' ".get_comma_sep_refs('-references',$refFiles):""; my $bwargs=$bleu_weight!=1 ? "--bleu_weight=$bleu_weight":""; $cmd="$MAPINPUT -w $inweights -r $dir/hgs $bwargs -s $devSize -d $rand_directions --max_similarity=$maxsim --oracle_directions=$oraclen --oracle_batch=$oracleb $targs $dirargs > $dir/agenda.$im1-$opt_iter"; print STDERR "COMMAND:\n$cmd\n"; - $result = system($cmd); - unless ($result == 0){ - cleanup(); - die "ERROR: mapinput command returned non-zero exit code $result\n"; - } - - `mkdir -p $dir/splag.$im1`; + check_call($cmd); + check_call("mkdir -p $dir/splag.$im1"); $cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1-$opt_iter $dir/splag.$im1/mapinput."; print STDERR "COMMAND:\n$cmd\n"; - $result = system($cmd); - unless ($result == 0){ - cleanup(); - print STDERR "ERROR: split command returned non-zero exit code $result\n"; - die; - } + check_call($cmd); opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!"; my @shards = grep { /^mapinput\./ } readdir(DIR); closedir DIR; @@ -360,11 +352,7 @@ while (1){ my $script = "$MAPPER -s $srcFile -l $metric $refs_comma_sep < $dir/splag.$im1/$shard | sort -t \$'\\t' -k 1 > $dir/splag.$im1/$mapoutput"; if ($run_local) { print STDERR "COMMAND:\n$script\n"; - $result = system($script); - unless ($result == 0){ - cleanup(); - die "ERROR: mapper returned non-zero exit code $result\n"; - } + check_bash_call($script); } elsif ($use_make) { my $script_file = "$dir/scripts/map.$shard"; open F, ">$script_file" or die "Can't write $script_file: $!"; @@ -384,13 +372,13 @@ while (1){ if ($first_shard) { print STDERR "$script\n"; $first_shard=0; } $nmappers++; - my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; - my $jobid = `$qcmd`; + my $qcmd = "QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file"; + my $jobid = check_output("$qcmd"); die "qsub failed: $!\nCMD was: $qcmd" unless $? == 0; chomp $jobid; $jobid =~ s/^(\d+)(.*?)$/\1/g; $jobid =~ s/^Your job (\d+) .*$/\1/; - push(@cleanupcmds, "`qdel $jobid 2> /dev/null`"); + push(@cleanupcmds, check_output("qdel $jobid 2> /dev/null")); print STDERR " $jobid"; if ($joblist == "") { $joblist = $jobid; } else {$joblist = $joblist . "\|" . $jobid; } @@ -403,18 +391,14 @@ while (1){ close $mkfile; my $mcmd = "make -j $use_make -f $mkfilename"; print STDERR "\nExecuting: $mcmd\n"; - $result = system($mcmd); - unless ($result == 0){ - cleanup(); - die "ERROR: make command returned non-zero exit code $result\n"; - } + check_call($mcmd); } else { print STDERR "\nLaunched $nmappers mappers.\n"; sleep 8; print STDERR "Waiting for mappers to complete...\n"; while ($nmappers > 0) { sleep 5; - my @livejobs = grep(/$joblist/, split(/\n/, `qstat | grep -v ' C '`)); + my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat | grep -v ' C '"))); $nmappers = scalar @livejobs; } print STDERR "All mappers complete.\n"; @@ -430,16 +414,12 @@ while (1){ } print STDERR "Results for $tol/$til lines\n"; print STDERR "\nSORTING AND RUNNING VEST REDUCER\n"; - print STDERR `date`; + print STDERR unchecked_output("date"); $cmd="sort -t \$'\\t' -k 1 @mapoutputs | $REDUCER -l $metric > $dir/redoutput.$im1"; print STDERR "COMMAND:\n$cmd\n"; - $result = system($cmd); - unless ($result == 0){ - cleanup(); - die "ERROR: reducer command returned non-zero exit code $result\n"; - } + check_bash_call($cmd); $cmd="sort -nk3 $DIR_FLAG '-t|' $dir/redoutput.$im1 | head -1"; - my $best=`$cmd`; chomp $best; + my $best=check_bash_output("$cmd"); chomp $best; print STDERR "$best\n"; my ($oa, $x, $xscore) = split /\|/, $best; $score = $xscore; @@ -472,11 +452,11 @@ while (1){ my $v = ($ori{$k} + $axi{$k} * $x) / $norm; print W "$k $v\n"; } - `rm -rf $dir/splag.$im1`; + check_call("rm -rf $dir/splag.$im1"); $inweights = $finalFile; } $lastWeightsFile = "$dir/weights.$iteration"; - `cp $inweights $lastWeightsFile`; + check_call("cp $inweights $lastWeightsFile"); if ($icc < 2) { print STDERR "\nREACHED STOPPING CRITERION: score change too little\n"; last; @@ -520,7 +500,7 @@ sub get_lines { sub get_comma_sep_refs { my ($r,$p) = @_; - my $o = `echo $p`; + my $o = check_output("echo $p"); chomp $o; my @files = split /\s+/, $o; return "-$r " . join(" -$r ", @files); @@ -607,7 +587,7 @@ sub enseg { sub print_help { - my $executable = `basename $0`; chomp $executable; + my $executable = check_output("basename $0"); chomp $executable; print << "Help"; Usage: $executable [options] diff --git a/vest/parallelize.pl b/vest/parallelize.pl index cb5406ec..47b77c79 100755 --- a/vest/parallelize.pl +++ b/vest/parallelize.pl @@ -28,6 +28,12 @@ use IPC::Open2; use strict; use POSIX ":sys_wait_h"; +use File::Basename; +my $myDir = dirname(__FILE__); +print STDERR __FILE__." -> $myDir\n"; +push(@INC, $myDir); +require "libcall.pl"; + my $tailn=5; # +0 = concatenate all the client logs. 5 = last 5 lines my $recycle_clients; # spawn new clients when previous ones terminate my $stay_alive; # dont let server die when having zero clients @@ -76,7 +82,7 @@ sub preview_files { my @f=grep { ! ($skipempty && -z $_) } @$l; my $fn=join(' ',map {escape_shell($_)} @f); my $cmd="tail -n $n $fn"; - `$cmd`.($footer?"\nNONEMPTY FILES:\n$fn\n":""); + check_output("$cmd").($footer?"\nNONEMPTY FILES:\n$fn\n":""); } sub prefix_dirname($) { #like `dirname but if ends in / then return the whole thing @@ -105,7 +111,7 @@ sub extend_path($$;$$) { $dir=prefix_dirname($base); } my @cmd=("/bin/mkdir","-p",$dir); - system(@cmd) if $mkdir; + check_call(@cmd) if $mkdir; } return $base.$ext; } @@ -142,7 +148,7 @@ my $prog=shift; if ($no_which) { $cmd=$prog; } else { - $cmd=`which $prog`; + $cmd=check_output("which $prog"); chomp $cmd; die "$prog not found - $cmd" unless $cmd; } @@ -156,7 +162,7 @@ my $cdcmd=$no_cd ? '' : ("cd ".escape_shell($abscwd)."\n"); my $executable = $cmd; $executable =~ s/^\s*(\S+)($|\s.*)/$1/; -$executable=`basename $executable`; +$executable=check_output("basename $executable"); chomp $executable; @@ -192,10 +198,10 @@ sub launch_job_on_node; # vars -my $mydir = `dirname $0`; chomp $mydir; +my $mydir = check_output("dirname $0"); chomp $mydir; my $sentserver = "$mydir/sentserver"; my $sentclient = "$mydir/sentclient"; -my $host = `hostname`; +my $host = check_output("hostname"); chomp $host; @@ -205,7 +211,7 @@ my $port = 50300+int(rand($randp)); my $endp=$port+$tryp; sub listening_port_lines { my $quiet=$verbose?'':'2>/dev/null'; - `netstat -a -n $quiet | grep LISTENING | grep -i tcp` + return unchecked_output("netstat -a -n $quiet | grep LISTENING | grep -i tcp"); } my $netstat=&listening_port_lines; @@ -270,17 +276,14 @@ $cdcmd$sentclient $host:$port:$key $cmd # my $todo = "$sentserver -k $key $multiflag $port "; my $todo = "$sentserver -k $key $multiflag $port $stay_alive_flag "; if ($verbose){ print STDERR "Running: $todo\n"; } - my $rc = system($todo); - if ($rc){ - die "Error: sentserver returned code $rc\n"; - } + check_call($todo); } sub numof_live_jobs { if ($use_fork) { die "not implemented"; } else { - my @livejobs = grep(/$joblist/, split(/\n/, `qstat`)); + my @livejobs = grep(/$joblist/, split(/\n/, check_output("qstat"))); return ($#livejobs + 1); } } @@ -320,7 +323,7 @@ sub launch_job { } if ($joblist == "") { $joblist = $jobid; } else {$joblist = $joblist . "\|" . $jobid; } - my $cleanfn="`qdel $jobid 2> /dev/null`"; + my $cleanfn=check_output("qdel $jobid 2> /dev/null"); push(@cleanup_cmds, $cleanfn); } close QOUT; @@ -345,7 +348,7 @@ sub launch_job_fork { close $fh; my $todo = "/bin/sh $scr_name 1> $outfile 2> $errorfile"; print STDERR "EXEC: $todo\n"; - my $out = `$todo`; + my $out = check_output("$todo"); print STDERR "RES: $out\n"; unlink $scr_name or warn "Failed to remove $scr_name"; exit 0; @@ -377,7 +380,7 @@ sub cleanup { sub print_help { - my $name = `basename $0`; chomp $name; + my $name = check_output("basename $0"); chomp $name; print << "Help"; usage: $name [options] -- cgit v1.2.3 From 60ec6fc57dbef221e6c6d75b1fadbb4cce098ddf Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Thu, 10 Mar 2011 17:13:25 -0500 Subject: Unsuck dist-vest --- vest/libcall.pl | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 vest/libcall.pl diff --git a/vest/libcall.pl b/vest/libcall.pl new file mode 100644 index 00000000..c7d0f128 --- /dev/null +++ b/vest/libcall.pl @@ -0,0 +1,71 @@ +use IPC::Open3; +use Symbol qw(gensym); + +$DUMMY_STDERR = gensym(); +$DUMMY_STDIN = gensym(); + +# Run the command and ignore failures +sub unchecked_call { + system("@_") +} + +# Run the command and return its output, if any ignoring failures +sub unchecked_output { + return `@_` +} + +# WARNING: Do not use this for commands that will return large amounts +# of stdout or stderr -- they might block indefinitely +sub check_output { + print STDERR "Executing and gathering output: @_\n"; + + my $pid = open3($DUMMY_STDIN, \*PH, $DUMMY_STDERR, @_); + my $proc_output = ""; + while( ) { + $proc_output .= $_; + } + waitpid($pid, 0); + # TODO: Grab signal that the process died from + my $child_exit_status = $? >> 8; + if($child_exit_status == 0) { + return $proc_output; + } else { + print STDERR "ERROR: Execution of @_ failed.\n"; + exit(1); + } +} + +# Based on Moses' safesystem sub +sub check_call { + print STDERR "Executing: @_\n"; + system(@_); + my $exitcode = $? >> 8; + if($exitcode == 0) { + return 0; + } elsif ($? == -1) { + print STDERR "ERROR: Failed to execute: @_\n $!\n"; + exit(1); + + } elsif ($? & 127) { + printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n", + ($? & 127), ($? & 128) ? 'with' : 'without'; + exit(1); + + } else { + print STDERR "Failed with exit code: $exitcode\n" if $exitcode; + exit($exitcode); + } +} + +sub check_bash_call { + my @args = ( "bash", "-auxeo", "pipefail", "-c", "@_"); + check_call(@args); +} + +sub check_bash_output { + my @args = ( "bash", "-auxeo", "pipefail", "-c", "@_"); + return check_output(@args); +} + +# perl module weirdness... +return 1; -- cgit v1.2.3 From e7dfcff76b8d53775f753c2367776c5348bd73b5 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 10 Mar 2011 20:02:07 -0500 Subject: experimental nonlinear feature --- training/augment_grammar.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/training/augment_grammar.cc b/training/augment_grammar.cc index 19120d00..9ad03b6c 100644 --- a/training/augment_grammar.cc +++ b/training/augment_grammar.cc @@ -36,6 +36,7 @@ bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { ("source_lm,l",po::value(),"Source language LM (KLM)") ("collapse_weights,w",po::value(), "Collapse weights into a single feature X using the coefficients from this weights file") ("add_shape_types,s", "Add rule shape types") + ("extra_lex_feature,x", "Experimental nonlinear lexical weighting feature") ("replace_files,r", "Replace files with transformed variants (requires loading full grammar into memory)") ("grammar,g", po::value >(), "Input (also output) grammar file(s)"); po::options_description clo("Command line options"); @@ -85,6 +86,7 @@ template float Score(const vector& str, const Model &model return total; } +bool extra_feature; int kSrcLM; vector col_weights; bool gather_rules; @@ -94,9 +96,15 @@ static void RuleHelper(const TRulePtr& new_rule, const unsigned int ctf_level, c static const int kSrcLM = FD::Convert("SrcLM"); static const int kPC = FD::Convert("PC"); static const int kX = FD::Convert("X"); + static const int kPhraseModel2 = FD::Convert("PhraseModel_1"); + static const int kNewLex = FD::Convert("NewLex"); TRulePtr r; r.reset(new TRule(*new_rule)); if (ngram) r->scores_.set_value(kSrcLM, Score(r->f_, *ngram)); r->scores_.set_value(kPC, 1.0); + if (extra_feature) { + float v = r->scores_.value(kPhraseModel2); + r->scores_.set_value(kNewLex, v*(v+1)); + } if (col_weights.size()) { double score = r->scores_.dot(col_weights); r->scores_.clear(); @@ -122,6 +130,7 @@ int main(int argc, char** argv) { cerr << "Loaded " << (int)ngram->Order() << "-gram KenLM (MapSize=" << word_map.size() << ")\n"; cerr << " = " << kSOS << endl; } else { ngram = NULL; } + extra_feature = conf.count("extra_lex_feature") > 0; if (conf.count("collapse_weights")) { Weights w; w.InitFromFile(conf["collapse_weights"].as()); -- cgit v1.2.3 From 8056672a26e44fa0fb9be93218597e31d63847ac Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Thu, 10 Mar 2011 20:08:05 -0500 Subject: whitespace diff --- Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.am b/Makefile.am index 0e7b3885..a808c211 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5,5 +5,5 @@ SUBDIRS = utils mteval klm/util klm/lm decoder phrasinator training vest extools AUTOMAKE_OPTIONS = foreign ACLOCAL_AMFLAGS = -I m4 -AM_CPPFLAGS = -D_GLIBCXX_PARALLEL +AM_CPPFLAGS = -D_GLIBCXX_PARALLEL -- cgit v1.2.3 From f592783059ffcca9dbc32401b0f17d7b69a7ded5 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Thu, 10 Mar 2011 20:35:35 -0500 Subject: scons readme --- README.scons | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 README.scons diff --git a/README.scons b/README.scons new file mode 100644 index 00000000..eb56a666 --- /dev/null +++ b/README.scons @@ -0,0 +1,21 @@ +Building cdec with the scons build system: + +While we don't build all components of cdec under scons (yet), +scons makes things much simpler and reliable. Currently, +building cdec with scons is experimental. + +Step-by-step: +1) Obtain a Python V2.4 or later (you probably already have this) +2) Download and install Scons v2.0+ from http://www.scons.org/download.php + $ wget http://prdownloads.sourceforge.net/scons/scons-2.0.1.tar.gz + $ tar -xvzf scons-2.0.1.tar.gz + $ cd scons-2.0.1 + $ python setup.py build + $ sudo setup.py install #(if you have root) + $ setup.py install --home=~/prefix #(to install to a user-space prefix instead) + +3) Run scons. Some examples: + $ scons #(just build the decoder) + $ scons -j16 #(build using 16 cores) + $ scons -h #(get help) + $ scons --with-boost=~/prefix #(use ~/prefix as the boost directory) -- cgit v1.2.3 From 474fb566124aadf2de1cc73d3788386bd584eb15 Mon Sep 17 00:00:00 2001 From: Jonathan Clark Date: Thu, 10 Mar 2011 20:38:43 -0500 Subject: Pretty formatting --- SConstruct | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/SConstruct b/SConstruct index e61af175..1a7885bc 100644 --- a/SConstruct +++ b/SConstruct @@ -1,3 +1,7 @@ +#!/usr/bin/python + +# EXPERIMENTAL and HACKY version of cdec build in scons + AddOption('--prefix', dest='prefix', type='string', nargs=1, action='store', metavar='DIR', help='installation prefix') AddOption('--with-boost', dest='boost', type='string', nargs=1, action='store', metavar='DIR', -- cgit v1.2.3 From cb8e10b896d4d19a3d7c9b997a74f9bd39a5c714 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 10 Mar 2011 21:46:56 -0500 Subject: starcluster env tweak --- environment/LocalConfig.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/environment/LocalConfig.pm b/environment/LocalConfig.pm index 62ee2dd3..d136610d 100644 --- a/environment/LocalConfig.pm +++ b/environment/LocalConfig.pm @@ -71,6 +71,7 @@ sub environment_name { sub qsub_args { my $mem = shift @_; die "qsub_args requires a memory amount as a parameter, e.g. 4G" unless $mem; + return 'qsub -V -cwd' if environment_name() eq 'StarCluster'; my $mf = $CONFIG{'QSubMemFlag'} or die "QSubMemFlag not set for $senvironment_name"; my $cmd = "qsub -S /bin/bash ${mf}${mem}"; if ($CONFIG{'QSubQueue'}) { $cmd .= ' ' . $CONFIG{'QSubQueue'}; } -- cgit v1.2.3