From 7640d85b92f61016e0712825920c6a259329d79b Mon Sep 17 00:00:00 2001
From: Chris Dyer <cdyer@cs.cmu.edu>
Date: Sun, 18 Nov 2012 11:31:21 -0500
Subject: more consistent naming, interface, fix compile error

---
 mteval/meteor_jar.cc.in                   |   3 +
 pro-train/Makefile.am                     |  11 -
 pro-train/README.shared-mem               |   9 -
 pro-train/dist-pro.pl                     | 671 ------------------------------
 pro-train/mr_pro_generate_mapper_input.pl |  18 -
 pro-train/mr_pro_map.cc                   | 201 ---------
 pro-train/mr_pro_reduce.cc                | 286 -------------
 pro/Makefile.am                           |  11 +
 pro/README.shared-mem                     |   9 +
 pro/mr_pro_generate_mapper_input.pl       |  18 +
 pro/mr_pro_map.cc                         | 201 +++++++++
 pro/mr_pro_reduce.cc                      | 286 +++++++++++++
 pro/pro.pl                                | 555 ++++++++++++++++++++++++
 13 files changed, 1083 insertions(+), 1196 deletions(-)
 create mode 100644 mteval/meteor_jar.cc.in
 delete mode 100644 pro-train/Makefile.am
 delete mode 100644 pro-train/README.shared-mem
 delete mode 100755 pro-train/dist-pro.pl
 delete mode 100755 pro-train/mr_pro_generate_mapper_input.pl
 delete mode 100644 pro-train/mr_pro_map.cc
 delete mode 100644 pro-train/mr_pro_reduce.cc
 create mode 100644 pro/Makefile.am
 create mode 100644 pro/README.shared-mem
 create mode 100755 pro/mr_pro_generate_mapper_input.pl
 create mode 100644 pro/mr_pro_map.cc
 create mode 100644 pro/mr_pro_reduce.cc
 create mode 100755 pro/pro.pl

diff --git a/mteval/meteor_jar.cc.in b/mteval/meteor_jar.cc.in
new file mode 100644
index 00000000..fe45a72a
--- /dev/null
+++ b/mteval/meteor_jar.cc.in
@@ -0,0 +1,3 @@
+
+const char* meteor_jar_path = "@METEOR_JAR@";
+
diff --git a/pro-train/Makefile.am b/pro-train/Makefile.am
deleted file mode 100644
index 1e9d46b0..00000000
--- a/pro-train/Makefile.am
+++ /dev/null
@@ -1,11 +0,0 @@
-bin_PROGRAMS = \
-  mr_pro_map \
-  mr_pro_reduce
-
-mr_pro_map_SOURCES = mr_pro_map.cc
-mr_pro_map_LDADD = $(top_srcdir)/training/libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
-
-mr_pro_reduce_SOURCES = mr_pro_reduce.cc
-mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz
-
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training
diff --git a/pro-train/README.shared-mem b/pro-train/README.shared-mem
deleted file mode 100644
index 7728efc0..00000000
--- a/pro-train/README.shared-mem
+++ /dev/null
@@ -1,9 +0,0 @@
-If you want to run dist-vest.pl on a very large shared memory machine, do the
-following:
-
-  ./dist-vest.pl --use-make I --decode-nodes J --weights weights.init --source-file=dev.src --ref-files=dev.ref.* cdec.ini
-
-This will use I jobs for doing the line search and J jobs to run the decoder. Typically, since the
-decoder must load grammars, language models, etc., J should be smaller than I, but this will depend
-on the system you are running on and the complexity of the models used for decoding.
-
diff --git a/pro-train/dist-pro.pl b/pro-train/dist-pro.pl
deleted file mode 100755
index 31258fa6..00000000
--- a/pro-train/dist-pro.pl
+++ /dev/null
@@ -1,671 +0,0 @@
-#!/usr/bin/env perl
-use strict;
-my @ORIG_ARGV=@ARGV;
-use Cwd qw(getcwd);
-my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
-
-# Skip local config (used for distributing jobs) if we're running in local-only mode
-use LocalConfig;
-use Getopt::Long;
-use IPC::Open2;
-use POSIX ":sys_wait_h";
-my $QSUB_CMD = qsub_args(mert_memory());
-my $default_jobs = env_default_jobs();
-
-my $VEST_DIR="$SCRIPT_DIR/../dpmert";
-require "$VEST_DIR/libcall.pl";
-
-# Default settings
-my $srcFile;
-my $refFiles;
-my $bin_dir = $SCRIPT_DIR;
-die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
-my $FAST_SCORE="$bin_dir/../mteval/fast_score";
-die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
-my $MAPINPUT = "$bin_dir/mr_pro_generate_mapper_input.pl";
-my $MAPPER = "$bin_dir/mr_pro_map";
-my $REDUCER = "$bin_dir/mr_pro_reduce";
-my $parallelize = "$VEST_DIR/parallelize.pl";
-my $libcall = "$VEST_DIR/libcall.pl";
-my $sentserver = "$VEST_DIR/sentserver";
-my $sentclient = "$VEST_DIR/sentclient";
-my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm";
-
-my $SCORER = $FAST_SCORE;
-die "Can't find $MAPPER" unless -x $MAPPER;
-my $cdec = "$bin_dir/../decoder/cdec";
-die "Can't find decoder in $cdec" unless -x $cdec;
-die "Can't find $parallelize" unless -x $parallelize;
-die "Can't find $libcall" unless -e $libcall;
-my $decoder = $cdec;
-my $lines_per_mapper = 30;
-my $iteration = 1;
-my $best_weights;
-my $psi = 1;
-my $default_max_iter = 30;
-my $max_iterations = $default_max_iter;
-my $jobs = $default_jobs;   # number of decode nodes
-my $pmem = "4g";
-my $disable_clean = 0;
-my %seen_weights;
-my $help = 0;
-my $epsilon = 0.0001;
-my $dryrun = 0;
-my $last_score = -10000000;
-my $metric = "ibm_bleu";
-my $dir;
-my $iniFile;
-my $weights;
-my $use_make = 1;  # use make to parallelize
-my $useqsub = 0;
-my $initial_weights;
-my $pass_suffix = '';
-my $cpbin=1;
-
-# regularization strength
-my $tune_regularizer = 0;
-my $reg = 500;
-my $reg_previous = 5000;
-
-# Process command-line options
-Getopt::Long::Configure("no_auto_abbrev");
-if (GetOptions(
-	"jobs=i" => \$jobs,
-	"dont-clean" => \$disable_clean,
-	"pass-suffix=s" => \$pass_suffix,
-        "qsub" => \$useqsub,
-	"dry-run" => \$dryrun,
-	"epsilon=s" => \$epsilon,
-	"interpolate-with-weights=f" => \$psi,
-	"help" => \$help,
-        "weights=s" => \$initial_weights,
-	"tune-regularizer" => \$tune_regularizer,
-	"reg=f" => \$reg,
-	"reg-previous=f" => \$reg_previous,
-	"use-make=i" => \$use_make,
-	"max-iterations=i" => \$max_iterations,
-	"pmem=s" => \$pmem,
-        "cpbin!" => \$cpbin,
-	"ref-files=s" => \$refFiles,
-	"metric=s" => \$metric,
-	"source-file=s" => \$srcFile,
-	"workdir=s" => \$dir,
-) == 0 || @ARGV!=1 || $help) {
-	print_help();
-	exit;
-}
-
-die "--tune-regularizer is no longer supported with --reg-previous and --reg. Please tune manually.\n" if $tune_regularizer;
-
-if ($useqsub) {
-  $use_make = 0;
-  die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub();
-}
-
-my @missing_args = ();
-if (!defined $srcFile) { push @missing_args, "--source-file"; }
-if (!defined $refFiles) { push @missing_args, "--ref-files"; }
-if (!defined $initial_weights) { push @missing_args, "--weights"; }
-die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args);
-
-if ($metric =~ /^(combi|ter)$/i) {
-  $lines_per_mapper = 5;
-}
-
-($iniFile) = @ARGV;
-
-
-sub write_config;
-sub enseg;
-sub print_help;
-
-my $nodelist;
-my $host =check_output("hostname"); chomp $host;
-my $bleu;
-my $interval_count = 0;
-my $logfile;
-my $projected_score;
-
-# used in sorting scores
-my $DIR_FLAG = '-r';
-if ($metric =~ /^ter$|^aer$/i) {
-  $DIR_FLAG = '';
-}
-
-my $refs_comma_sep = get_comma_sep_refs('r',$refFiles);
-
-unless ($dir){
-	$dir = "protrain";
-}
-unless ($dir =~ /^\//){  # convert relative path to absolute path
-	my $basedir = check_output("pwd");
-	chomp $basedir;
-	$dir = "$basedir/$dir";
-}
-
-
-# Initializations and helper functions
-srand;
-
-my @childpids = ();
-my @cleanupcmds = ();
-
-sub cleanup {
-	print STDERR "Cleanup...\n";
-	for my $pid (@childpids){ unchecked_call("kill $pid"); }
-	for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); }
-	exit 1;
-};
-# Always call cleanup, no matter how we exit
-*CORE::GLOBAL::exit = 
-    sub{ cleanup(); }; 
-$SIG{INT} = "cleanup";
-$SIG{TERM} = "cleanup";
-$SIG{HUP} = "cleanup";
-
-my $decoderBase = check_output("basename $decoder"); chomp $decoderBase;
-my $newIniFile = "$dir/$decoderBase.ini";
-my $inputFileName = "$dir/input";
-my $user = $ENV{"USER"};
-
-
-# process ini file
--e $iniFile || die "Error: could not open $iniFile for reading\n";
-open(INI, $iniFile);
-
-use File::Basename qw(basename);
-#pass bindir, refs to vars holding bin
-sub modbin {
-    local $_;
-    my $bindir=shift;
-    check_call("mkdir -p $bindir");
-    -d $bindir || die "couldn't make bindir $bindir";
-    for (@_) {
-        my $src=$$_;
-        $$_="$bindir/".basename($src);
-        check_call("cp -p $src $$_");
-    }
-}
-sub dirsize {
-    opendir ISEMPTY,$_[0];
-    return scalar(readdir(ISEMPTY))-1;
-}
-my @allweights;
-if ($dryrun){
-	write_config(*STDERR);
-	exit 0;
-} else {
-	if (-e $dir && dirsize($dir)>1 && -e "$dir/hgs" ){ # allow preexisting logfile, binaries, but not dist-pro.pl outputs
-	  die "ERROR: working dir $dir already exists\n\n";
-	} else {
-		-e $dir || mkdir $dir;
-		mkdir "$dir/hgs";
-        modbin("$dir/bin",\$LocalConfig,\$cdec,\$SCORER,\$MAPINPUT,\$MAPPER,\$REDUCER,\$parallelize,\$sentserver,\$sentclient,\$libcall) if $cpbin;
-    mkdir "$dir/scripts";
-        my $cmdfile="$dir/rerun-pro.sh";
-        open CMD,'>',$cmdfile;
-        print CMD "cd ",&getcwd,"\n";
-#        print CMD &escaped_cmdline,"\n"; #buggy - last arg is quoted.
-        my $cline=&cmdline."\n";
-        print CMD $cline;
-        close CMD;
-        print STDERR $cline;
-        chmod(0755,$cmdfile);
-	check_call("cp $initial_weights $dir/weights.0");
-	die "Can't find weights.0" unless (-e "$dir/weights.0");
-	}
-	write_config(*STDERR);
-}
-
-
-# Generate initial files and values
-check_call("cp $iniFile $newIniFile");
-$iniFile = $newIniFile;
-
-my $newsrc = "$dir/dev.input";
-enseg($srcFile, $newsrc);
-$srcFile = $newsrc;
-my $devSize = 0;
-open F, "<$srcFile" or die "Can't read $srcFile: $!";
-while(<F>) { $devSize++; }
-close F;
-
-unless($best_weights){ $best_weights = $weights; }
-unless($projected_score){ $projected_score = 0.0; }
-$seen_weights{$weights} = 1;
-
-my $random_seed = int(time / 1000);
-my $lastWeightsFile;
-my $lastPScore = 0;
-# main optimization loop
-while (1){
-	print STDERR "\n\nITERATION $iteration\n==========\n";
-
-	if ($iteration > $max_iterations){
-		print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n";
-		last;
-	}
-	# iteration-specific files
-	my $runFile="$dir/run.raw.$iteration";
-	my $onebestFile="$dir/1best.$iteration";
-	my $logdir="$dir/logs.$iteration";
-	my $decoderLog="$logdir/decoder.sentserver.log.$iteration";
-	my $scorerLog="$logdir/scorer.log.$iteration";
-	check_call("mkdir -p $logdir");
-
-
-	#decode
-	print STDERR "RUNNING DECODER AT ";
-	print STDERR unchecked_output("date");
-	my $im1 = $iteration - 1;
-	my $weightsFile="$dir/weights.$im1";
-        push @allweights, "-w $dir/weights.$im1";
-        `rm -f $dir/hgs/*.gz`;
-	my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs";
-	my $pcmd;
-	if ($use_make) {
-		$pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --";
-	} else {
-		$pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --";
-	}
-	my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile";
-	print STDERR "COMMAND:\n$cmd\n";
-	check_bash_call($cmd);
-        my $num_hgs;
-        my $num_topbest;
-        my $retries = 0;
-	while($retries < 5) {
-	    $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l");
-	    $num_topbest = check_output("wc -l < $runFile");
-	    print STDERR "NUMBER OF HGs: $num_hgs\n";
-	    print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n";
-	    if($devSize == $num_hgs && $devSize == $num_topbest) {
-		last;
-	    } else {
-		print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n";
-		sleep(3);
-	    }
-	    $retries++;
-	}
-	die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
-	my $dec_score = check_output("cat $runFile | $SCORER $refs_comma_sep -m $metric");
-	chomp $dec_score;
-	print STDERR "DECODER SCORE: $dec_score\n";
-
-	# save space
-	check_call("gzip -f $runFile");
-	check_call("gzip -f $decoderLog");
-
-	# run optimizer
-	print STDERR "RUNNING OPTIMIZER AT ";
-	print STDERR unchecked_output("date");
-	print STDERR " - GENERATE TRAINING EXEMPLARS\n";
-	my $mergeLog="$logdir/prune-merge.log.$iteration";
-
-	my $score = 0;
-	my $icc = 0;
-	my $inweights="$dir/weights.$im1";
-	$cmd="$MAPINPUT $dir/hgs > $dir/agenda.$im1";
-	print STDERR "COMMAND:\n$cmd\n";
-	check_call($cmd);
-	check_call("mkdir -p $dir/splag.$im1");
-	$cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1 $dir/splag.$im1/mapinput.";
-	print STDERR "COMMAND:\n$cmd\n";
-	check_call($cmd);
-	opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!";
-	my @shards = grep { /^mapinput\./ } readdir(DIR);
-	closedir DIR;
-	die "No shards!" unless scalar @shards > 0;
-	my $joblist = "";
-	my $nmappers = 0;
-	@cleanupcmds = ();
-	my %o2i = ();
-	my $first_shard = 1;
-	my $mkfile; # only used with makefiles
-	my $mkfilename;
-	if ($use_make) {
-		$mkfilename = "$dir/splag.$im1/domap.mk";
-		open $mkfile, ">$mkfilename" or die "Couldn't write $mkfilename: $!";
-		print $mkfile "all: $dir/splag.$im1/map.done\n\n";
-	}
-	my @mkouts = ();  # only used with makefiles
-	my @mapoutputs = ();
-	for my $shard (@shards) {
-		my $mapoutput = $shard;
-		my $client_name = $shard;
-		$client_name =~ s/mapinput.//;
-		$client_name = "pro.$client_name";
-		$mapoutput =~ s/mapinput/mapoutput/;
-		push @mapoutputs, "$dir/splag.$im1/$mapoutput";
-		$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard";
-		my $script = "$MAPPER -s $srcFile -m $metric $refs_comma_sep -w $inweights -K $dir/kbest < $dir/splag.$im1/$shard > $dir/splag.$im1/$mapoutput";
-		if ($use_make) {
-			my $script_file = "$dir/scripts/map.$shard";
-			open F, ">$script_file" or die "Can't write $script_file: $!";
-			print F "#!/bin/bash\n";
-			print F "$script\n";
-			close F;
-			my $output = "$dir/splag.$im1/$mapoutput";
-			push @mkouts, $output;
-			chmod(0755, $script_file) or die "Can't chmod $script_file: $!";
-			if ($first_shard) { print STDERR "$script\n"; $first_shard=0; }
-			print $mkfile "$output: $dir/splag.$im1/$shard\n\t$script_file\n\n";
-		} else {
-			my $script_file = "$dir/scripts/map.$shard";
-			open F, ">$script_file" or die "Can't write $script_file: $!";
-			print F "$script\n";
-			close F;
-			if ($first_shard) { print STDERR "$script\n"; $first_shard=0; }
-
-			$nmappers++;
-			my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file";
-			my $jobid = check_output("$qcmd");
-			chomp $jobid;
-			$jobid =~ s/^(\d+)(.*?)$/\1/g;
-			$jobid =~ s/^Your job (\d+) .*$/\1/;
-		 	push(@cleanupcmds, "qdel $jobid 2> /dev/null");
-			print STDERR " $jobid";
-			if ($joblist == "") { $joblist = $jobid; }
-			else {$joblist = $joblist . "\|" . $jobid; }
-		}
-	}
-	my @dev_outs = ();
-	my @devtest_outs = ();
-	if ($tune_regularizer) {
-		for (my $i = 0; $i < scalar @mapoutputs; $i++) {
-			if ($i % 3 == 1) {
-				push @devtest_outs, $mapoutputs[$i];
-			} else {
-				push @dev_outs, $mapoutputs[$i];
-			}
-		}
-		if (scalar @devtest_outs == 0) {
-			die "Not enough training instances for regularization tuning! Rerun without --tune-regularizer\n";
-		}
-	} else {
-		@dev_outs = @mapoutputs;
-	}
-	if ($use_make) {
-		print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n";
-		close $mkfile;
-		my $mcmd = "make -j $jobs -f $mkfilename";
-		print STDERR "\nExecuting: $mcmd\n";
-		check_call($mcmd);
-	} else {
-		print STDERR "\nLaunched $nmappers mappers.\n";
-      		sleep 8;
-		print STDERR "Waiting for mappers to complete...\n";
-		while ($nmappers > 0) {
-		  sleep 5;
-		  my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '")));
-		  $nmappers = scalar @livejobs;
-		}
-		print STDERR "All mappers complete.\n";
-	}
-	my $tol = 0;
-	my $til = 0;
-	my $dev_test_file = "$dir/splag.$im1/devtest.gz";
-	if ($tune_regularizer) {
-		my $cmd = "cat @devtest_outs | gzip > $dev_test_file";
-		check_bash_call($cmd);
-		die "Can't find file $dev_test_file" unless -f $dev_test_file;
-	}
-        #print STDERR "MO: @mapoutputs\n";
-	for my $mo (@mapoutputs) {
-		#my $olines = get_lines($mo);
-		#my $ilines = get_lines($o2i{$mo});
-		#die "$mo: no training instances generated!" if $olines == 0;
-	}
-	print STDERR "\nRUNNING CLASSIFIER (REDUCER)\n";
-	print STDERR unchecked_output("date");
-	$cmd="cat @dev_outs | $REDUCER -w $dir/weights.$im1 -C $reg -y $reg_previous --interpolate_with_weights $psi";
-	if ($tune_regularizer) {
-		$cmd .= " -T -t $dev_test_file";
-	}
-        $cmd .= " > $dir/weights.$iteration";
-	print STDERR "COMMAND:\n$cmd\n";
-	check_bash_call($cmd);
-	$lastWeightsFile = "$dir/weights.$iteration";
-	if ($tune_regularizer) {
-		open W, "<$lastWeightsFile" or die "Can't read $lastWeightsFile: $!";
-		my $line = <W>;
-		close W;
-		my ($sharp, $label, $nreg) = split /\s|=/, $line;
-		print STDERR "REGULARIZATION STRENGTH ($label) IS $nreg\n";
-		$reg = $nreg;
-		# only tune regularizer on first iteration?
-		$tune_regularizer = 0;
-	}
-	$lastPScore = $score;
-	$iteration++;
-	print STDERR "\n==========\n";
-}
-
-print STDERR "\nFINAL WEIGHTS: $lastWeightsFile\n(Use -w <this file> with the decoder)\n\n";
-
-print STDOUT "$lastWeightsFile\n";
-
-exit 0;
-
-sub get_lines {
-  my $fn = shift @_;
-  open FL, "<$fn" or die "Couldn't read $fn: $!";
-  my $lc = 0;
-  while(<FL>) { $lc++; }
-  return $lc;
-}
-
-sub get_comma_sep_refs {
-  my ($r,$p) = @_;
-  my $o = check_output("echo $p");
-  chomp $o;
-  my @files = split /\s+/, $o;
-  return "-$r " . join(" -$r ", @files);
-}
-
-sub read_weights_file {
-  my ($file) = @_;
-  open F, "<$file" or die "Couldn't read $file: $!";
-  my @r = ();
-  my $pm = -1;
-  while(<F>) {
-    next if /^#/;
-    next if /^\s*$/;
-    chomp;
-    if (/^(.+)\s+(.+)$/) {
-      my $m = $1;
-      my $w = $2;
-      die "Weights out of order: $m <= $pm" unless $m > $pm;
-      push @r, $w;
-    } else {
-      warn "Unexpected feature name in weight file: $_";
-    }
-  }
-  close F;
-  return join ' ', @r;
-}
-
-# subs
-sub write_config {
-	my $fh = shift;
-	my $cleanup = "yes";
-	if ($disable_clean) {$cleanup = "no";}
-
-	print $fh "\n";
-	print $fh "DECODER:          $decoder\n";
-	print $fh "INI FILE:         $iniFile\n";
-	print $fh "WORKING DIR:      $dir\n";
-	print $fh "SOURCE (DEV):     $srcFile\n";
-	print $fh "REFS (DEV):       $refFiles\n";
-	print $fh "EVAL METRIC:      $metric\n";
-	print $fh "MAX ITERATIONS:   $max_iterations\n";
-	print $fh "JOBS:             $jobs\n";
-	print $fh "HEAD NODE:        $host\n";
-	print $fh "PMEM (DECODING):  $pmem\n";
-	print $fh "CLEANUP:          $cleanup\n";
-}
-
-sub update_weights_file {
-  my ($neww, $rfn, $rpts) = @_;
-  my @feats = @$rfn;
-  my @pts = @$rpts;
-  my $num_feats = scalar @feats;
-  my $num_pts = scalar @pts;
-  die "$num_feats (num_feats) != $num_pts (num_pts)" unless $num_feats == $num_pts;
-  open G, ">$neww" or die;
-  for (my $i = 0; $i < $num_feats; $i++) {
-    my $f = $feats[$i];
-    my $lambda = $pts[$i];
-    print G "$f $lambda\n";
-  }
-  close G;
-}
-
-sub enseg {
-	my $src = shift;
-	my $newsrc = shift;
-	open(SRC, $src);
-	open(NEWSRC, ">$newsrc");
-	my $i=0;
-	while (my $line=<SRC>){
-		chomp $line;
-		if ($line =~ /^\s*<seg/i) {
-		    if($line =~ /id="[0-9]+"/) {
-			print NEWSRC "$line\n";
-		    } else {
-			die "When using segments with pre-generated <seg> tags, you must include a zero-based id attribute";
-		    }
-		} else {
-			print NEWSRC "<seg id=\"$i\">$line</seg>\n";
-		}
-		$i++;
-	}
-	close SRC;
-	close NEWSRC;
-	die "Empty dev set!" if ($i == 0);
-}
-
-sub print_help {
-
-	my $executable = check_output("basename $0"); chomp $executable;
-	print << "Help";
-
-Usage: $executable [options] <ini file>
-
-	$executable [options] <ini file>
-		Runs a complete PRO optimization using the ini file specified.
-
-Required:
-
-	--ref-files <files>
-		Dev set ref files.  This option takes only a single string argument.
-		To use multiple files (including file globbing), this argument should
-		be quoted.
-
-	--source-file <file>
-		Dev set source file.
-
-	--weights <file>
-		Initial weights file (use empty file to start from 0)
-
-General options:
-
-	--help
-		Print this message and exit.
-
-	--max-iterations <M>
-		Maximum number of iterations to run.  If not specified, defaults
-		to $default_max_iter.
-
-	--metric <method>
-		Metric to optimize.
-		Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi
-
-	--pass-suffix <S>
-		If the decoder is doing multi-pass decoding, the pass suffix "2",
-		"3", etc., is used to control what iteration of weights is set.
-
-	--workdir <dir>
-		Directory for intermediate and output files.  If not specified, the
-		name is derived from the ini filename.  Assuming that the ini
-		filename begins with the decoder name and ends with ini, the default
-		name of the working directory is inferred from the middle part of
-		the filename.  E.g. an ini file named decoder.foo.ini would have
-		a default working directory name foo.
-
-Regularization options:
-
-	--reg <F>
-		l2 regularization strength [default=500]. The greater this value,
-		the closer to zero the weights will be.
-
-	--reg-previous <F>
-		l2 penalty for moving away from the weights from the previous
-		iteration. [default=5000]. The greater this value, the closer
-		to the previous iteration's weights the next iteration's weights
-		will be.
-
-Job control options:
-
-	--jobs <I>
-		Number of decoder processes to run in parallel. [default=$default_jobs]
-
-	--qsub
-		Use qsub to run jobs in parallel (qsub must be configured in
-		environment/LocalEnvironment.pm)
-
-	--pmem <N>
-		Amount of physical memory requested for parallel decoding jobs
-		(used with qsub requests only)
-
-Deprecated options:
-
-	--interpolate-with-weights <F>
-		[deprecated] At each iteration the resulting weights are
-		interpolated with the weights from the previous iteration, with
-		this factor. [default=1.0, i.e., no effect]
-
-Help
-}
-
-sub convert {
-  my ($str) = @_;
-  my @ps = split /;/, $str;
-  my %dict = ();
-  for my $p (@ps) {
-    my ($k, $v) = split /=/, $p;
-    $dict{$k} = $v;
-  }
-  return %dict;
-}
-
-
-sub cmdline {
-    return join ' ',($0,@ORIG_ARGV);
-}
-
-#buggy: last arg gets quoted sometimes?
-my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]};
-my $shell_escape_in_quote=qr{[\\"\$`!]};
-
-sub escape_shell {
-    my ($arg)=@_;
-    return undef unless defined $arg;
-    if ($arg =~ /$is_shell_special/) {
-        $arg =~ s/($shell_escape_in_quote)/\\$1/g;
-        return "\"$arg\"";
-    }
-    return $arg;
-}
-
-sub escaped_shell_args {
-    return map {local $_=$_;chomp;escape_shell($_)} @_;
-}
-
-sub escaped_shell_args_str {
-    return join ' ',&escaped_shell_args(@_);
-}
-
-sub escaped_cmdline {
-    return "$0 ".&escaped_shell_args_str(@ORIG_ARGV);
-}
diff --git a/pro-train/mr_pro_generate_mapper_input.pl b/pro-train/mr_pro_generate_mapper_input.pl
deleted file mode 100755
index b30fc4fd..00000000
--- a/pro-train/mr_pro_generate_mapper_input.pl
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-die "Usage: $0 HG_DIR\n" unless scalar @ARGV == 1;
-my $d = shift @ARGV;
-die "Can't find directory $d" unless -d $d;
-
-opendir(DIR, $d) or die "Can't read $d: $!";
-my @hgs = grep { /\.gz$/ } readdir(DIR);
-closedir DIR;
-
-for my $hg (@hgs) {
-  my $file = $hg;
-  my $id = $hg;
-  $id =~ s/(\.json)?\.gz//;
-  print "$d/$file $id\n";
-}
-
diff --git a/pro-train/mr_pro_map.cc b/pro-train/mr_pro_map.cc
deleted file mode 100644
index eef40b8a..00000000
--- a/pro-train/mr_pro_map.cc
+++ /dev/null
@@ -1,201 +0,0 @@
-#include <sstream>
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <tr1/unordered_map>
-
-#include <boost/functional/hash.hpp>
-#include <boost/shared_ptr.hpp>
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "candidate_set.h"
-#include "sampler.h"
-#include "filelib.h"
-#include "stringlib.h"
-#include "weights.h"
-#include "inside_outside.h"
-#include "hg_io.h"
-#include "ns.h"
-#include "ns_docscorer.h"
-
-// This is Figure 4 (Algorithm Sampler) from Hopkins&May (2011)
-
-using namespace std;
-namespace po = boost::program_options;
-
-boost::shared_ptr<MT19937> rng;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation (tokenized text)")
-        ("weights,w",po::value<string>(), "[REQD] Weights files from current iterations")
-        ("kbest_repository,K",po::value<string>()->default_value("./kbest"),"K-best list repository (directory)")
-        ("input,i",po::value<string>()->default_value("-"), "Input file to map (- is STDIN)")
-        ("source,s",po::value<string>()->default_value(""), "Source file (ignored, except for AER)")
-        ("evaluation_metric,m",po::value<string>()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)")
-        ("kbest_size,k",po::value<unsigned>()->default_value(1500u), "Top k-hypotheses to extract")
-        ("candidate_pairs,G", po::value<unsigned>()->default_value(5000u), "Number of pairs to sample per hypothesis (Gamma)")
-        ("best_pairs,X", po::value<unsigned>()->default_value(50u), "Number of pairs, ranked by magnitude of objective delta, to retain (Xi)")
-        ("random_seed,S", po::value<uint32_t>(), "Random seed (if not specified, /dev/random will be used)")
-        ("help,h", "Help");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  bool flag = false;
-  if (!conf->count("reference")) {
-    cerr << "Please specify one or more references using -r <REF.TXT>\n";
-    flag = true;
-  }
-  if (!conf->count("weights")) {
-    cerr << "Please specify weights using -w <WEIGHTS.TXT>\n";
-    flag = true;
-  }
-  if (flag || conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-struct ThresholdAlpha {
-  explicit ThresholdAlpha(double t = 0.05) : threshold(t) {}
-  double operator()(double mag) const {
-    if (mag < threshold) return 0.0; else return 1.0;
-  }
-  const double threshold;
-};
-
-struct TrainingInstance {
-  TrainingInstance(const SparseVector<weight_t>& feats, bool positive, float diff) : x(feats), y(positive), gdiff(diff) {}
-  SparseVector<weight_t> x;
-#undef DEBUGGING_PRO
-#ifdef DEBUGGING_PRO
-  vector<WordID> a;
-  vector<WordID> b;
-#endif
-  bool y;
-  float gdiff;
-};
-#ifdef DEBUGGING_PRO
-ostream& operator<<(ostream& os, const TrainingInstance& d) {
-  return os << d.gdiff << " y=" << d.y << "\tA:" << TD::GetString(d.a) << "\n\tB: " << TD::GetString(d.b) << "\n\tX: " << d.x;
-}
-#endif
-
-struct DiffOrder {
-  bool operator()(const TrainingInstance& a, const TrainingInstance& b) const {
-    return a.gdiff > b.gdiff;
-  }
-};
-
-void Sample(const unsigned gamma,
-            const unsigned xi,
-            const training::CandidateSet& J_i,
-            const EvaluationMetric* metric,
-            vector<TrainingInstance>* pv) {
-  const bool invert_score = metric->IsErrorMetric();
-  vector<TrainingInstance> v1, v2;
-  float avg_diff = 0;
-  for (unsigned i = 0; i < gamma; ++i) {
-    const size_t a = rng->inclusive(0, J_i.size() - 1)();
-    const size_t b = rng->inclusive(0, J_i.size() - 1)();
-    if (a == b) continue;
-    float ga = metric->ComputeScore(J_i[a].eval_feats);
-    float gb = metric->ComputeScore(J_i[b].eval_feats);
-    bool positive = gb < ga;
-    if (invert_score) positive = !positive;
-    const float gdiff = fabs(ga - gb);
-    if (!gdiff) continue;
-    avg_diff += gdiff;
-    SparseVector<weight_t> xdiff = (J_i[a].fmap - J_i[b].fmap).erase_zeros();
-    if (xdiff.empty()) {
-      cerr << "Empty diff:\n  " << TD::GetString(J_i[a].ewords) << endl << "x=" << J_i[a].fmap << endl;
-      cerr << "  " << TD::GetString(J_i[b].ewords) << endl << "x=" << J_i[b].fmap << endl;
-      continue;
-    }
-    v1.push_back(TrainingInstance(xdiff, positive, gdiff));
-#ifdef DEBUGGING_PRO
-    v1.back().a = J_i[a].hyp;
-    v1.back().b = J_i[b].hyp;
-    cerr << "N: " << v1.back() << endl;
-#endif
-  }
-  avg_diff /= v1.size();
-
-  for (unsigned i = 0; i < v1.size(); ++i) {
-    double p = 1.0 / (1.0 + exp(-avg_diff - v1[i].gdiff));
-    // cerr << "avg_diff=" << avg_diff << "  gdiff=" << v1[i].gdiff << "  p=" << p << endl;
-    if (rng->next() < p) v2.push_back(v1[i]);
-  }
-  vector<TrainingInstance>::iterator mid = v2.begin() + xi;
-  if (xi > v2.size()) mid = v2.end();
-  partial_sort(v2.begin(), mid, v2.end(), DiffOrder());
-  copy(v2.begin(), mid, back_inserter(*pv));
-#ifdef DEBUGGING_PRO
-  if (v2.size() >= 5) {
-    for (int i =0; i < (mid - v2.begin()); ++i) {
-      cerr << v2[i] << endl;
-    }
-    cerr << pv->back() << endl;
-  }
-#endif
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  if (conf.count("random_seed"))
-    rng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
-  else
-    rng.reset(new MT19937);
-  const string evaluation_metric = conf["evaluation_metric"].as<string>();
-
-  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
-  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
-  cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl;
-
-  Hypergraph hg;
-  string last_file;
-  ReadFile in_read(conf["input"].as<string>());
-  istream &in=*in_read.stream();
-  const unsigned kbest_size = conf["kbest_size"].as<unsigned>();
-  const unsigned gamma = conf["candidate_pairs"].as<unsigned>();
-  const unsigned xi = conf["best_pairs"].as<unsigned>();
-  string weightsf = conf["weights"].as<string>();
-  vector<weight_t> weights;
-  Weights::InitFromFile(weightsf, &weights);
-  string kbest_repo = conf["kbest_repository"].as<string>();
-  MkDirP(kbest_repo);
-  while(in) {
-    vector<TrainingInstance> v;
-    string line;
-    getline(in, line);
-    if (line.empty()) continue;
-    istringstream is(line);
-    int sent_id;
-    string file;
-    // path-to-file (JSON) sent_id
-    is >> file >> sent_id;
-    ReadFile rf(file);
-    ostringstream os;
-    training::CandidateSet J_i;
-    os << kbest_repo << "/kbest." << sent_id << ".txt.gz";
-    const string kbest_file = os.str();
-    if (FileExists(kbest_file))
-      J_i.ReadFromFile(kbest_file);
-    HypergraphIO::ReadFromJSON(rf.stream(), &hg);
-    hg.Reweight(weights);
-    J_i.AddKBestCandidates(hg, kbest_size, ds[sent_id]);
-    J_i.WriteToFile(kbest_file);
-
-    Sample(gamma, xi, J_i, metric, &v);
-    for (unsigned i = 0; i < v.size(); ++i) {
-      const TrainingInstance& vi = v[i];
-      cout << vi.y << "\t" << vi.x << endl;
-      cout << (!vi.y) << "\t" << (vi.x * -1.0) << endl;
-    }
-  }
-  return 0;
-}
-
diff --git a/pro-train/mr_pro_reduce.cc b/pro-train/mr_pro_reduce.cc
deleted file mode 100644
index 5ef9b470..00000000
--- a/pro-train/mr_pro_reduce.cc
+++ /dev/null
@@ -1,286 +0,0 @@
-#include <cstdlib>
-#include <sstream>
-#include <iostream>
-#include <fstream>
-#include <vector>
-
-#include <boost/program_options.hpp>
-#include <boost/program_options/variables_map.hpp>
-
-#include "filelib.h"
-#include "weights.h"
-#include "sparse_vector.h"
-#include "optimize.h"
-#include "liblbfgs/lbfgs++.h"
-
-using namespace std;
-namespace po = boost::program_options;
-
-// since this is a ranking model, there should be equal numbers of
-// positive and negative examples, so the bias should be 0
-static const double MAX_BIAS = 1e-10;
-
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
-  po::options_description opts("Configuration options");
-  opts.add_options()
-        ("weights,w", po::value<string>(), "Weights from previous iteration (used as initialization and interpolation")
-        ("regularization_strength,C",po::value<double>()->default_value(500.0), "l2 regularization strength")
-        ("l1",po::value<double>()->default_value(0.0), "l1 regularization strength")
-        ("regularize_to_weights,y",po::value<double>()->default_value(5000.0), "Differences in learned weights to previous weights are penalized with an l2 penalty with this strength; 0.0 = no effect")
-        ("memory_buffers,m",po::value<unsigned>()->default_value(100), "Number of memory buffers (LBFGS)")
-        ("min_reg,r",po::value<double>()->default_value(0.01), "When tuning (-T) regularization strength, minimum regularization strenght")
-        ("max_reg,R",po::value<double>()->default_value(1e6), "When tuning (-T) regularization strength, maximum regularization strenght")
-        ("testset,t",po::value<string>(), "Optional held-out test set")
-        ("tune_regularizer,T", "Use the held out test set (-t) to tune the regularization strength")
-        ("interpolate_with_weights,p",po::value<double>()->default_value(1.0), "[deprecated] Output weights are p*w + (1-p)*w_prev; 1.0 = no effect")
-        ("help,h", "Help");
-  po::options_description dcmdline_options;
-  dcmdline_options.add(opts);
-  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
-  if (conf->count("help")) {
-    cerr << dcmdline_options << endl;
-    exit(1);
-  }
-}
-
-void ParseSparseVector(string& line, size_t cur, SparseVector<weight_t>* out) {
-  SparseVector<weight_t>& x = *out;
-  size_t last_start = cur;
-  size_t last_comma = string::npos;
-  while(cur <= line.size()) {
-    if (line[cur] == ' ' || cur == line.size()) {
-      if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) {
-        cerr << "[ERROR] " << line << endl << "  position = " << cur << endl;
-        exit(1);
-      }
-      const int fid = FD::Convert(line.substr(last_start, last_comma - last_start));
-      if (cur < line.size()) line[cur] = 0;
-      const weight_t val = strtod(&line[last_comma + 1], NULL);
-      x.set_value(fid, val);
-
-      last_comma = string::npos;
-      last_start = cur+1;
-    } else {
-      if (line[cur] == '=')
-        last_comma = cur;
-    }
-    ++cur;
-  }
-}
-
-void ReadCorpus(istream* pin, vector<pair<bool, SparseVector<weight_t> > >* corpus) {
-  istream& in = *pin;
-  corpus->clear();
-  bool flag = false;
-  int lc = 0;
-  string line;
-  SparseVector<weight_t> x;
-  while(getline(in, line)) {
-    ++lc;
-    if (lc % 1000 == 0) { cerr << '.'; flag = true; }
-    if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; }
-    if (line.empty()) continue;
-    const size_t ks = line.find("\t");
-    assert(string::npos != ks);
-    assert(ks == 1);
-    const bool y = line[0] == '1';
-    x.clear();
-    ParseSparseVector(line, ks + 1, &x);
-    corpus->push_back(make_pair(y, x));
-  }
-  if (flag) cerr << endl;
-}
-
-void GradAdd(const SparseVector<weight_t>& v, const double scale, weight_t* acc) {
-  for (SparseVector<weight_t>::const_iterator it = v.begin();
-       it != v.end(); ++it) {
-    acc[it->first] += it->second * scale;
-  }
-}
-
-double ApplyRegularizationTerms(const double C,
-                                const double T,
-                                const vector<weight_t>& weights,
-                                const vector<weight_t>& prev_weights,
-                                weight_t* g) {
-  double reg = 0;
-  for (size_t i = 0; i < weights.size(); ++i) {
-    const double prev_w_i = (i < prev_weights.size() ? prev_weights[i] : 0.0);
-    const double& w_i = weights[i];
-    reg += C * w_i * w_i;
-    g[i] += 2 * C * w_i;
-
-    const double diff_i = w_i - prev_w_i;
-    reg += T * diff_i * diff_i;
-    g[i] += 2 * T * diff_i;
-  }
-  return reg;
-}
-
-double TrainingInference(const vector<weight_t>& x,
-                         const vector<pair<bool, SparseVector<weight_t> > >& corpus,
-                         weight_t* g = NULL) {
-  double cll = 0;
-  for (int i = 0; i < corpus.size(); ++i) {
-    const double dotprod = corpus[i].second.dot(x) + (x.size() ? x[0] : weight_t()); // x[0] is bias
-    double lp_false = dotprod;
-    double lp_true = -dotprod;
-    if (0 < lp_true) {
-      lp_true += log1p(exp(-lp_true));
-      lp_false = log1p(exp(lp_false));
-    } else {
-      lp_true = log1p(exp(lp_true));
-      lp_false += log1p(exp(-lp_false));
-    }
-    lp_true*=-1;
-    lp_false*=-1;
-    if (corpus[i].first) {  // true label
-      cll -= lp_true;
-      if (g) {
-        // g -= corpus[i].second * exp(lp_false);
-        GradAdd(corpus[i].second, -exp(lp_false), g);
-        g[0] -= exp(lp_false); // bias
-      }
-    } else {                  // false label
-      cll -= lp_false;
-      if (g) {
-        // g += corpus[i].second * exp(lp_true);
-        GradAdd(corpus[i].second, exp(lp_true), g);
-        g[0] += exp(lp_true); // bias
-      }
-    }
-  }
-  return cll;
-}
-
-struct ProLoss {
-  ProLoss(const vector<pair<bool, SparseVector<weight_t> > >& tr,
-          const vector<pair<bool, SparseVector<weight_t> > >& te,
-          const double c,
-          const double t,
-          const vector<weight_t>& px) : training(tr), testing(te), C(c), T(t), prev_x(px){}
-  double operator()(const vector<double>& x, double* g) const {
-    fill(g, g + x.size(), 0.0);
-    double cll = TrainingInference(x, training, g);
-    tppl = 0;
-    if (testing.size())
-      tppl = pow(2.0, TrainingInference(x, testing, g) / (log(2) * testing.size()));
-    double ppl = cll / log(2);
-    ppl /= training.size();
-    ppl = pow(2.0, ppl);
-    double reg = ApplyRegularizationTerms(C, T, x, prev_x, g);
-    return cll + reg;
-  }
-  const vector<pair<bool, SparseVector<weight_t> > >& training, testing;
-  const double C, T;
-  const vector<double>& prev_x;
-  mutable double tppl;
-};
-
-// return held-out log likelihood
-double LearnParameters(const vector<pair<bool, SparseVector<weight_t> > >& training,
-                       const vector<pair<bool, SparseVector<weight_t> > >& testing,
-                       const double C,
-                       const double C1,
-                       const double T,
-                       const unsigned memory_buffers,
-                       const vector<weight_t>& prev_x,
-                       vector<weight_t>* px) {
-  assert(px->size() == prev_x.size());
-  ProLoss loss(training, testing, C, T, prev_x);
-  LBFGS<ProLoss> lbfgs(px, loss, memory_buffers, C1);
-  lbfgs.MinimizeFunction();
-  return loss.tppl;
-}
-
-int main(int argc, char** argv) {
-  po::variables_map conf;
-  InitCommandLine(argc, argv, &conf);
-  string line;
-  vector<pair<bool, SparseVector<weight_t> > > training, testing;
-  const bool tune_regularizer = conf.count("tune_regularizer");
-  if (tune_regularizer && !conf.count("testset")) {
-    cerr << "--tune_regularizer requires --testset to be set\n";
-    return 1;
-  }
-  const double min_reg = conf["min_reg"].as<double>();
-  const double max_reg = conf["max_reg"].as<double>();
-  double C = conf["regularization_strength"].as<double>(); // will be overridden if parameter is tuned
-  double C1 = conf["l1"].as<double>(); // will be overridden if parameter is tuned
-  const double T = conf["regularize_to_weights"].as<double>();
-  assert(C >= 0.0);
-  assert(min_reg >= 0.0);
-  assert(max_reg >= 0.0);
-  assert(max_reg > min_reg);
-  const double psi = conf["interpolate_with_weights"].as<double>();
-  if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; return 1; }
-  ReadCorpus(&cin, &training);
-  if (conf.count("testset")) {
-    ReadFile rf(conf["testset"].as<string>());
-    ReadCorpus(rf.stream(), &testing);
-  }
-  cerr << "Number of features: " << FD::NumFeats() << endl;
-
-  vector<weight_t> x, prev_x;  // x[0] is bias
-  if (conf.count("weights")) {
-    Weights::InitFromFile(conf["weights"].as<string>(), &x);
-    x.resize(FD::NumFeats());
-    prev_x = x;
-  } else {
-    x.resize(FD::NumFeats());
-    prev_x = x;
-  }
-  cerr << "         Number of features: " << x.size() << endl;
-  cerr << "Number of training examples: " << training.size() << endl;
-  cerr << "Number of  testing examples: " << testing.size() << endl;
-  double tppl = 0.0;
-  vector<pair<double,double> > sp;
-  vector<double> smoothed;
-  if (tune_regularizer) {
-    C = min_reg;
-    const double steps = 18;
-    double sweep_factor = exp((log(max_reg) - log(min_reg)) / steps);
-    cerr << "SWEEP FACTOR: " << sweep_factor << endl;
-    while(C < max_reg) {
-      cerr << "C=" << C << "\tT=" <<T << endl;
-      tppl = LearnParameters(training, testing, C, C1, T, conf["memory_buffers"].as<unsigned>(), prev_x, &x);
-      sp.push_back(make_pair(C, tppl));
-      C *= sweep_factor;
-    }
-    smoothed.resize(sp.size(), 0);
-    smoothed[0] = sp[0].second;
-    smoothed.back() = sp.back().second; 
-    for (int i = 1; i < sp.size()-1; ++i) {
-      double prev = sp[i-1].second;
-      double next = sp[i+1].second;
-      double cur = sp[i].second;
-      smoothed[i] = (prev*0.2) + cur * 0.6 + (0.2*next);
-    }
-    double best_ppl = 9999999;
-    unsigned best_i = 0;
-    for (unsigned i = 0; i < sp.size(); ++i) {
-      if (smoothed[i] < best_ppl) {
-        best_ppl = smoothed[i];
-        best_i = i;
-      }
-    }
-    C = sp[best_i].first;
-  }  // tune regularizer
-  tppl = LearnParameters(training, testing, C, C1, T, conf["memory_buffers"].as<unsigned>(), prev_x, &x);
-  if (conf.count("weights")) {
-    for (int i = 1; i < x.size(); ++i) {
-      x[i] = (x[i] * psi) + prev_x[i] * (1.0 - psi);
-    }
-  }
-  cout.precision(15);
-  cout << "# C=" << C << "\theld out perplexity=";
-  if (tppl) { cout << tppl << endl; } else { cout << "N/A\n"; }
-  if (sp.size()) {
-    cout << "# Parameter sweep:\n";
-    for (int i = 0; i < sp.size(); ++i) {
-      cout << "# " << sp[i].first << "\t" << sp[i].second << "\t" << smoothed[i] << endl;
-    }
-  }
-  Weights::WriteToFile("-", x);
-  return 0;
-}
diff --git a/pro/Makefile.am b/pro/Makefile.am
new file mode 100644
index 00000000..1e9d46b0
--- /dev/null
+++ b/pro/Makefile.am
@@ -0,0 +1,11 @@
+bin_PROGRAMS = \
+  mr_pro_map \
+  mr_pro_reduce
+
+mr_pro_map_SOURCES = mr_pro_map.cc
+mr_pro_map_LDADD = $(top_srcdir)/training/libtraining.a $(top_srcdir)/decoder/libcdec.a $(top_srcdir)/mteval/libmteval.a $(top_srcdir)/utils/libutils.a -lz
+
+mr_pro_reduce_SOURCES = mr_pro_reduce.cc
+mr_pro_reduce_LDADD = $(top_srcdir)/training/liblbfgs/liblbfgs.a $(top_srcdir)/utils/libutils.a -lz
+
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I$(top_srcdir)/training
diff --git a/pro/README.shared-mem b/pro/README.shared-mem
new file mode 100644
index 00000000..7728efc0
--- /dev/null
+++ b/pro/README.shared-mem
@@ -0,0 +1,9 @@
+If you want to run dist-vest.pl on a very large shared memory machine, do the
+following:
+
+  ./dist-vest.pl --use-make I --decode-nodes J --weights weights.init --source-file=dev.src --ref-files=dev.ref.* cdec.ini
+
+This will use I jobs for doing the line search and J jobs to run the decoder. Typically, since the
+decoder must load grammars, language models, etc., J should be smaller than I, but this will depend
+on the system you are running on and the complexity of the models used for decoding.
+
diff --git a/pro/mr_pro_generate_mapper_input.pl b/pro/mr_pro_generate_mapper_input.pl
new file mode 100755
index 00000000..b30fc4fd
--- /dev/null
+++ b/pro/mr_pro_generate_mapper_input.pl
@@ -0,0 +1,18 @@
+#!/usr/bin/perl -w
+use strict;
+
+die "Usage: $0 HG_DIR\n" unless scalar @ARGV == 1;
+my $d = shift @ARGV;
+die "Can't find directory $d" unless -d $d;
+
+opendir(DIR, $d) or die "Can't read $d: $!";
+my @hgs = grep { /\.gz$/ } readdir(DIR);
+closedir DIR;
+
+for my $hg (@hgs) {
+  my $file = $hg;
+  my $id = $hg;
+  $id =~ s/(\.json)?\.gz//;
+  print "$d/$file $id\n";
+}
+
diff --git a/pro/mr_pro_map.cc b/pro/mr_pro_map.cc
new file mode 100644
index 00000000..eef40b8a
--- /dev/null
+++ b/pro/mr_pro_map.cc
@@ -0,0 +1,201 @@
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <tr1/unordered_map>
+
+#include <boost/functional/hash.hpp>
+#include <boost/shared_ptr.hpp>
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "candidate_set.h"
+#include "sampler.h"
+#include "filelib.h"
+#include "stringlib.h"
+#include "weights.h"
+#include "inside_outside.h"
+#include "hg_io.h"
+#include "ns.h"
+#include "ns_docscorer.h"
+
+// This is Figure 4 (Algorithm Sampler) from Hopkins&May (2011)
+
+using namespace std;
+namespace po = boost::program_options;
+
+boost::shared_ptr<MT19937> rng;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("reference,r",po::value<vector<string> >(), "[REQD] Reference translation (tokenized text)")
+        ("weights,w",po::value<string>(), "[REQD] Weights files from current iterations")
+        ("kbest_repository,K",po::value<string>()->default_value("./kbest"),"K-best list repository (directory)")
+        ("input,i",po::value<string>()->default_value("-"), "Input file to map (- is STDIN)")
+        ("source,s",po::value<string>()->default_value(""), "Source file (ignored, except for AER)")
+        ("evaluation_metric,m",po::value<string>()->default_value("IBM_BLEU"), "Evaluation metric (ibm_bleu, koehn_bleu, nist_bleu, ter, meteor, etc.)")
+        ("kbest_size,k",po::value<unsigned>()->default_value(1500u), "Top k-hypotheses to extract")
+        ("candidate_pairs,G", po::value<unsigned>()->default_value(5000u), "Number of pairs to sample per hypothesis (Gamma)")
+        ("best_pairs,X", po::value<unsigned>()->default_value(50u), "Number of pairs, ranked by magnitude of objective delta, to retain (Xi)")
+        ("random_seed,S", po::value<uint32_t>(), "Random seed (if not specified, /dev/random will be used)")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  bool flag = false;
+  if (!conf->count("reference")) {
+    cerr << "Please specify one or more references using -r <REF.TXT>\n";
+    flag = true;
+  }
+  if (!conf->count("weights")) {
+    cerr << "Please specify weights using -w <WEIGHTS.TXT>\n";
+    flag = true;
+  }
+  if (flag || conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+struct ThresholdAlpha {
+  explicit ThresholdAlpha(double t = 0.05) : threshold(t) {}
+  double operator()(double mag) const {
+    if (mag < threshold) return 0.0; else return 1.0;
+  }
+  const double threshold;
+};
+
+struct TrainingInstance {
+  TrainingInstance(const SparseVector<weight_t>& feats, bool positive, float diff) : x(feats), y(positive), gdiff(diff) {}
+  SparseVector<weight_t> x;
+#undef DEBUGGING_PRO
+#ifdef DEBUGGING_PRO
+  vector<WordID> a;
+  vector<WordID> b;
+#endif
+  bool y;
+  float gdiff;
+};
+#ifdef DEBUGGING_PRO
+ostream& operator<<(ostream& os, const TrainingInstance& d) {
+  return os << d.gdiff << " y=" << d.y << "\tA:" << TD::GetString(d.a) << "\n\tB: " << TD::GetString(d.b) << "\n\tX: " << d.x;
+}
+#endif
+
+struct DiffOrder {
+  bool operator()(const TrainingInstance& a, const TrainingInstance& b) const {
+    return a.gdiff > b.gdiff;
+  }
+};
+
+void Sample(const unsigned gamma,
+            const unsigned xi,
+            const training::CandidateSet& J_i,
+            const EvaluationMetric* metric,
+            vector<TrainingInstance>* pv) {
+  const bool invert_score = metric->IsErrorMetric();
+  vector<TrainingInstance> v1, v2;
+  float avg_diff = 0;
+  for (unsigned i = 0; i < gamma; ++i) {
+    const size_t a = rng->inclusive(0, J_i.size() - 1)();
+    const size_t b = rng->inclusive(0, J_i.size() - 1)();
+    if (a == b) continue;
+    float ga = metric->ComputeScore(J_i[a].eval_feats);
+    float gb = metric->ComputeScore(J_i[b].eval_feats);
+    bool positive = gb < ga;
+    if (invert_score) positive = !positive;
+    const float gdiff = fabs(ga - gb);
+    if (!gdiff) continue;
+    avg_diff += gdiff;
+    SparseVector<weight_t> xdiff = (J_i[a].fmap - J_i[b].fmap).erase_zeros();
+    if (xdiff.empty()) {
+      cerr << "Empty diff:\n  " << TD::GetString(J_i[a].ewords) << endl << "x=" << J_i[a].fmap << endl;
+      cerr << "  " << TD::GetString(J_i[b].ewords) << endl << "x=" << J_i[b].fmap << endl;
+      continue;
+    }
+    v1.push_back(TrainingInstance(xdiff, positive, gdiff));
+#ifdef DEBUGGING_PRO
+    v1.back().a = J_i[a].hyp;
+    v1.back().b = J_i[b].hyp;
+    cerr << "N: " << v1.back() << endl;
+#endif
+  }
+  avg_diff /= v1.size();
+
+  for (unsigned i = 0; i < v1.size(); ++i) {
+    double p = 1.0 / (1.0 + exp(-avg_diff - v1[i].gdiff));
+    // cerr << "avg_diff=" << avg_diff << "  gdiff=" << v1[i].gdiff << "  p=" << p << endl;
+    if (rng->next() < p) v2.push_back(v1[i]);
+  }
+  vector<TrainingInstance>::iterator mid = v2.begin() + xi;
+  if (xi > v2.size()) mid = v2.end();
+  partial_sort(v2.begin(), mid, v2.end(), DiffOrder());
+  copy(v2.begin(), mid, back_inserter(*pv));
+#ifdef DEBUGGING_PRO
+  if (v2.size() >= 5) {
+    for (int i =0; i < (mid - v2.begin()); ++i) {
+      cerr << v2[i] << endl;
+    }
+    cerr << pv->back() << endl;
+  }
+#endif
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  if (conf.count("random_seed"))
+    rng.reset(new MT19937(conf["random_seed"].as<uint32_t>()));
+  else
+    rng.reset(new MT19937);
+  const string evaluation_metric = conf["evaluation_metric"].as<string>();
+
+  EvaluationMetric* metric = EvaluationMetric::Instance(evaluation_metric);
+  DocumentScorer ds(metric, conf["reference"].as<vector<string> >());
+  cerr << "Loaded " << ds.size() << " references for scoring with " << evaluation_metric << endl;
+
+  Hypergraph hg;
+  string last_file;
+  ReadFile in_read(conf["input"].as<string>());
+  istream &in=*in_read.stream();
+  const unsigned kbest_size = conf["kbest_size"].as<unsigned>();
+  const unsigned gamma = conf["candidate_pairs"].as<unsigned>();
+  const unsigned xi = conf["best_pairs"].as<unsigned>();
+  string weightsf = conf["weights"].as<string>();
+  vector<weight_t> weights;
+  Weights::InitFromFile(weightsf, &weights);
+  string kbest_repo = conf["kbest_repository"].as<string>();
+  MkDirP(kbest_repo);
+  while(in) {
+    vector<TrainingInstance> v;
+    string line;
+    getline(in, line);
+    if (line.empty()) continue;
+    istringstream is(line);
+    int sent_id;
+    string file;
+    // path-to-file (JSON) sent_id
+    is >> file >> sent_id;
+    ReadFile rf(file);
+    ostringstream os;
+    training::CandidateSet J_i;
+    os << kbest_repo << "/kbest." << sent_id << ".txt.gz";
+    const string kbest_file = os.str();
+    if (FileExists(kbest_file))
+      J_i.ReadFromFile(kbest_file);
+    HypergraphIO::ReadFromJSON(rf.stream(), &hg);
+    hg.Reweight(weights);
+    J_i.AddKBestCandidates(hg, kbest_size, ds[sent_id]);
+    J_i.WriteToFile(kbest_file);
+
+    Sample(gamma, xi, J_i, metric, &v);
+    for (unsigned i = 0; i < v.size(); ++i) {
+      const TrainingInstance& vi = v[i];
+      cout << vi.y << "\t" << vi.x << endl;
+      cout << (!vi.y) << "\t" << (vi.x * -1.0) << endl;
+    }
+  }
+  return 0;
+}
+
diff --git a/pro/mr_pro_reduce.cc b/pro/mr_pro_reduce.cc
new file mode 100644
index 00000000..5ef9b470
--- /dev/null
+++ b/pro/mr_pro_reduce.cc
@@ -0,0 +1,286 @@
+#include <cstdlib>
+#include <sstream>
+#include <iostream>
+#include <fstream>
+#include <vector>
+
+#include <boost/program_options.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "filelib.h"
+#include "weights.h"
+#include "sparse_vector.h"
+#include "optimize.h"
+#include "liblbfgs/lbfgs++.h"
+
+using namespace std;
+namespace po = boost::program_options;
+
+// since this is a ranking model, there should be equal numbers of
+// positive and negative examples, so the bias should be 0
+static const double MAX_BIAS = 1e-10;
+
+void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+  po::options_description opts("Configuration options");
+  opts.add_options()
+        ("weights,w", po::value<string>(), "Weights from previous iteration (used as initialization and interpolation")
+        ("regularization_strength,C",po::value<double>()->default_value(500.0), "l2 regularization strength")
+        ("l1",po::value<double>()->default_value(0.0), "l1 regularization strength")
+        ("regularize_to_weights,y",po::value<double>()->default_value(5000.0), "Differences in learned weights to previous weights are penalized with an l2 penalty with this strength; 0.0 = no effect")
+        ("memory_buffers,m",po::value<unsigned>()->default_value(100), "Number of memory buffers (LBFGS)")
+        ("min_reg,r",po::value<double>()->default_value(0.01), "When tuning (-T) regularization strength, minimum regularization strenght")
+        ("max_reg,R",po::value<double>()->default_value(1e6), "When tuning (-T) regularization strength, maximum regularization strenght")
+        ("testset,t",po::value<string>(), "Optional held-out test set")
+        ("tune_regularizer,T", "Use the held out test set (-t) to tune the regularization strength")
+        ("interpolate_with_weights,p",po::value<double>()->default_value(1.0), "[deprecated] Output weights are p*w + (1-p)*w_prev; 1.0 = no effect")
+        ("help,h", "Help");
+  po::options_description dcmdline_options;
+  dcmdline_options.add(opts);
+  po::store(parse_command_line(argc, argv, dcmdline_options), *conf);
+  if (conf->count("help")) {
+    cerr << dcmdline_options << endl;
+    exit(1);
+  }
+}
+
+void ParseSparseVector(string& line, size_t cur, SparseVector<weight_t>* out) {
+  SparseVector<weight_t>& x = *out;
+  size_t last_start = cur;
+  size_t last_comma = string::npos;
+  while(cur <= line.size()) {
+    if (line[cur] == ' ' || cur == line.size()) {
+      if (!(cur > last_start && last_comma != string::npos && cur > last_comma)) {
+        cerr << "[ERROR] " << line << endl << "  position = " << cur << endl;
+        exit(1);
+      }
+      const int fid = FD::Convert(line.substr(last_start, last_comma - last_start));
+      if (cur < line.size()) line[cur] = 0;
+      const weight_t val = strtod(&line[last_comma + 1], NULL);
+      x.set_value(fid, val);
+
+      last_comma = string::npos;
+      last_start = cur+1;
+    } else {
+      if (line[cur] == '=')
+        last_comma = cur;
+    }
+    ++cur;
+  }
+}
+
+void ReadCorpus(istream* pin, vector<pair<bool, SparseVector<weight_t> > >* corpus) {
+  istream& in = *pin;
+  corpus->clear();
+  bool flag = false;
+  int lc = 0;
+  string line;
+  SparseVector<weight_t> x;
+  while(getline(in, line)) {
+    ++lc;
+    if (lc % 1000 == 0) { cerr << '.'; flag = true; }
+    if (lc % 40000 == 0) { cerr << " [" << lc << "]\n"; flag = false; }
+    if (line.empty()) continue;
+    const size_t ks = line.find("\t");
+    assert(string::npos != ks);
+    assert(ks == 1);
+    const bool y = line[0] == '1';
+    x.clear();
+    ParseSparseVector(line, ks + 1, &x);
+    corpus->push_back(make_pair(y, x));
+  }
+  if (flag) cerr << endl;
+}
+
+void GradAdd(const SparseVector<weight_t>& v, const double scale, weight_t* acc) {
+  for (SparseVector<weight_t>::const_iterator it = v.begin();
+       it != v.end(); ++it) {
+    acc[it->first] += it->second * scale;
+  }
+}
+
+double ApplyRegularizationTerms(const double C,
+                                const double T,
+                                const vector<weight_t>& weights,
+                                const vector<weight_t>& prev_weights,
+                                weight_t* g) {
+  double reg = 0;
+  for (size_t i = 0; i < weights.size(); ++i) {
+    const double prev_w_i = (i < prev_weights.size() ? prev_weights[i] : 0.0);
+    const double& w_i = weights[i];
+    reg += C * w_i * w_i;
+    g[i] += 2 * C * w_i;
+
+    const double diff_i = w_i - prev_w_i;
+    reg += T * diff_i * diff_i;
+    g[i] += 2 * T * diff_i;
+  }
+  return reg;
+}
+
+double TrainingInference(const vector<weight_t>& x,
+                         const vector<pair<bool, SparseVector<weight_t> > >& corpus,
+                         weight_t* g = NULL) {
+  double cll = 0;
+  for (int i = 0; i < corpus.size(); ++i) {
+    const double dotprod = corpus[i].second.dot(x) + (x.size() ? x[0] : weight_t()); // x[0] is bias
+    double lp_false = dotprod;
+    double lp_true = -dotprod;
+    if (0 < lp_true) {
+      lp_true += log1p(exp(-lp_true));
+      lp_false = log1p(exp(lp_false));
+    } else {
+      lp_true = log1p(exp(lp_true));
+      lp_false += log1p(exp(-lp_false));
+    }
+    lp_true*=-1;
+    lp_false*=-1;
+    if (corpus[i].first) {  // true label
+      cll -= lp_true;
+      if (g) {
+        // g -= corpus[i].second * exp(lp_false);
+        GradAdd(corpus[i].second, -exp(lp_false), g);
+        g[0] -= exp(lp_false); // bias
+      }
+    } else {                  // false label
+      cll -= lp_false;
+      if (g) {
+        // g += corpus[i].second * exp(lp_true);
+        GradAdd(corpus[i].second, exp(lp_true), g);
+        g[0] += exp(lp_true); // bias
+      }
+    }
+  }
+  return cll;
+}
+
+struct ProLoss {
+  ProLoss(const vector<pair<bool, SparseVector<weight_t> > >& tr,
+          const vector<pair<bool, SparseVector<weight_t> > >& te,
+          const double c,
+          const double t,
+          const vector<weight_t>& px) : training(tr), testing(te), C(c), T(t), prev_x(px){}
+  double operator()(const vector<double>& x, double* g) const {
+    fill(g, g + x.size(), 0.0);
+    double cll = TrainingInference(x, training, g);
+    tppl = 0;
+    if (testing.size())
+      tppl = pow(2.0, TrainingInference(x, testing, g) / (log(2) * testing.size()));
+    double ppl = cll / log(2);
+    ppl /= training.size();
+    ppl = pow(2.0, ppl);
+    double reg = ApplyRegularizationTerms(C, T, x, prev_x, g);
+    return cll + reg;
+  }
+  const vector<pair<bool, SparseVector<weight_t> > >& training, testing;
+  const double C, T;
+  const vector<double>& prev_x;
+  mutable double tppl;
+};
+
+// return held-out log likelihood
+double LearnParameters(const vector<pair<bool, SparseVector<weight_t> > >& training,
+                       const vector<pair<bool, SparseVector<weight_t> > >& testing,
+                       const double C,
+                       const double C1,
+                       const double T,
+                       const unsigned memory_buffers,
+                       const vector<weight_t>& prev_x,
+                       vector<weight_t>* px) {
+  assert(px->size() == prev_x.size());
+  ProLoss loss(training, testing, C, T, prev_x);
+  LBFGS<ProLoss> lbfgs(px, loss, memory_buffers, C1);
+  lbfgs.MinimizeFunction();
+  return loss.tppl;
+}
+
+int main(int argc, char** argv) {
+  po::variables_map conf;
+  InitCommandLine(argc, argv, &conf);
+  string line;
+  vector<pair<bool, SparseVector<weight_t> > > training, testing;
+  const bool tune_regularizer = conf.count("tune_regularizer");
+  if (tune_regularizer && !conf.count("testset")) {
+    cerr << "--tune_regularizer requires --testset to be set\n";
+    return 1;
+  }
+  const double min_reg = conf["min_reg"].as<double>();
+  const double max_reg = conf["max_reg"].as<double>();
+  double C = conf["regularization_strength"].as<double>(); // will be overridden if parameter is tuned
+  double C1 = conf["l1"].as<double>(); // will be overridden if parameter is tuned
+  const double T = conf["regularize_to_weights"].as<double>();
+  assert(C >= 0.0);
+  assert(min_reg >= 0.0);
+  assert(max_reg >= 0.0);
+  assert(max_reg > min_reg);
+  const double psi = conf["interpolate_with_weights"].as<double>();
+  if (psi < 0.0 || psi > 1.0) { cerr << "Invalid interpolation weight: " << psi << endl; return 1; }
+  ReadCorpus(&cin, &training);
+  if (conf.count("testset")) {
+    ReadFile rf(conf["testset"].as<string>());
+    ReadCorpus(rf.stream(), &testing);
+  }
+  cerr << "Number of features: " << FD::NumFeats() << endl;
+
+  vector<weight_t> x, prev_x;  // x[0] is bias
+  if (conf.count("weights")) {
+    Weights::InitFromFile(conf["weights"].as<string>(), &x);
+    x.resize(FD::NumFeats());
+    prev_x = x;
+  } else {
+    x.resize(FD::NumFeats());
+    prev_x = x;
+  }
+  cerr << "         Number of features: " << x.size() << endl;
+  cerr << "Number of training examples: " << training.size() << endl;
+  cerr << "Number of  testing examples: " << testing.size() << endl;
+  double tppl = 0.0;
+  vector<pair<double,double> > sp;
+  vector<double> smoothed;
+  if (tune_regularizer) {
+    C = min_reg;
+    const double steps = 18;
+    double sweep_factor = exp((log(max_reg) - log(min_reg)) / steps);
+    cerr << "SWEEP FACTOR: " << sweep_factor << endl;
+    while(C < max_reg) {
+      cerr << "C=" << C << "\tT=" <<T << endl;
+      tppl = LearnParameters(training, testing, C, C1, T, conf["memory_buffers"].as<unsigned>(), prev_x, &x);
+      sp.push_back(make_pair(C, tppl));
+      C *= sweep_factor;
+    }
+    smoothed.resize(sp.size(), 0);
+    smoothed[0] = sp[0].second;
+    smoothed.back() = sp.back().second; 
+    for (int i = 1; i < sp.size()-1; ++i) {
+      double prev = sp[i-1].second;
+      double next = sp[i+1].second;
+      double cur = sp[i].second;
+      smoothed[i] = (prev*0.2) + cur * 0.6 + (0.2*next);
+    }
+    double best_ppl = 9999999;
+    unsigned best_i = 0;
+    for (unsigned i = 0; i < sp.size(); ++i) {
+      if (smoothed[i] < best_ppl) {
+        best_ppl = smoothed[i];
+        best_i = i;
+      }
+    }
+    C = sp[best_i].first;
+  }  // tune regularizer
+  tppl = LearnParameters(training, testing, C, C1, T, conf["memory_buffers"].as<unsigned>(), prev_x, &x);
+  if (conf.count("weights")) {
+    for (int i = 1; i < x.size(); ++i) {
+      x[i] = (x[i] * psi) + prev_x[i] * (1.0 - psi);
+    }
+  }
+  cout.precision(15);
+  cout << "# C=" << C << "\theld out perplexity=";
+  if (tppl) { cout << tppl << endl; } else { cout << "N/A\n"; }
+  if (sp.size()) {
+    cout << "# Parameter sweep:\n";
+    for (int i = 0; i < sp.size(); ++i) {
+      cout << "# " << sp[i].first << "\t" << sp[i].second << "\t" << smoothed[i] << endl;
+    }
+  }
+  Weights::WriteToFile("-", x);
+  return 0;
+}
diff --git a/pro/pro.pl b/pro/pro.pl
new file mode 100755
index 00000000..891b7e4c
--- /dev/null
+++ b/pro/pro.pl
@@ -0,0 +1,555 @@
+#!/usr/bin/env perl
+use strict;
+use File::Basename qw(basename);
+my @ORIG_ARGV=@ARGV;
+use Cwd qw(getcwd);
+my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; }
+
+# Skip local config (used for distributing jobs) if we're running in local-only mode
+use LocalConfig;
+use Getopt::Long;
+use IPC::Open2;
+use POSIX ":sys_wait_h";
+my $QSUB_CMD = qsub_args(mert_memory());
+my $default_jobs = env_default_jobs();
+
+my $VEST_DIR="$SCRIPT_DIR/../dpmert";
+require "$VEST_DIR/libcall.pl";
+
+# Default settings
+my $srcFile;
+my $refFiles;
+my $bin_dir = $SCRIPT_DIR;
+die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
+my $FAST_SCORE="$bin_dir/../mteval/fast_score";
+die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
+my $MAPINPUT = "$bin_dir/mr_pro_generate_mapper_input.pl";
+my $MAPPER = "$bin_dir/mr_pro_map";
+my $REDUCER = "$bin_dir/mr_pro_reduce";
+my $parallelize = "$VEST_DIR/parallelize.pl";
+my $libcall = "$VEST_DIR/libcall.pl";
+my $sentserver = "$VEST_DIR/sentserver";
+my $sentclient = "$VEST_DIR/sentclient";
+my $LocalConfig = "$SCRIPT_DIR/../environment/LocalConfig.pm";
+
+my $SCORER = $FAST_SCORE;
+die "Can't find $MAPPER" unless -x $MAPPER;
+my $cdec = "$bin_dir/../decoder/cdec";
+die "Can't find decoder in $cdec" unless -x $cdec;
+die "Can't find $parallelize" unless -x $parallelize;
+die "Can't find $libcall" unless -e $libcall;
+my $decoder = $cdec;
+my $lines_per_mapper = 30;
+my $iteration = 1;
+my $best_weights;
+my $psi = 1;
+my $default_max_iter = 30;
+my $max_iterations = $default_max_iter;
+my $jobs = $default_jobs;   # number of decode nodes
+my $pmem = "4g";
+my $disable_clean = 0;
+my %seen_weights;
+my $help = 0;
+my $epsilon = 0.0001;
+my $dryrun = 0;
+my $last_score = -10000000;
+my $metric = "ibm_bleu";
+my $dir;
+my $iniFile;
+my $weights;
+my $use_make = 1;  # use make to parallelize
+my $useqsub = 0;
+my $initial_weights;
+my $pass_suffix = '';
+my $devset;
+
+# regularization strength
+my $reg = 500;
+my $reg_previous = 5000;
+
+# Process command-line options
+if (GetOptions(
+	"config=s" => \$iniFile,
+	"weights=s" => \$initial_weights,
+        "devset=s" => \$devset,
+	"jobs=i" => \$jobs,
+	"metric=s" => \$metric,
+	"pass-suffix=s" => \$pass_suffix,
+        "qsub" => \$useqsub,
+	"help" => \$help,
+	"reg=f" => \$reg,
+	"reg-previous=f" => \$reg_previous,
+	"output-dir=s" => \$dir,
+) == 0 || @ARGV!=0 || $help) {
+	print_help();
+	exit;
+}
+
+if ($useqsub) {
+  $use_make = 0;
+  die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub();
+}
+
+my @missing_args = ();
+if (!defined $iniFile) { push @missing_args, "--config"; }
+if (!defined $devset) { push @missing_args, "--devset"; }
+if (!defined $initial_weights) { push @missing_args, "--weights"; }
+die "Please specify missing arguments: " . join (', ', @missing_args) . "\n" if (@missing_args);
+
+if ($metric =~ /^(combi|ter)$/i) {
+  $lines_per_mapper = 5;
+}
+
+my $host =check_output("hostname"); chomp $host;
+my $bleu;
+my $interval_count = 0;
+my $logfile;
+my $projected_score;
+
+# used in sorting scores
+my $DIR_FLAG = '-r';
+if ($metric =~ /^ter$|^aer$/i) {
+  $DIR_FLAG = '';
+}
+
+unless ($dir){
+	$dir = 'pro';
+}
+unless ($dir =~ /^\//){  # convert relative path to absolute path
+	my $basedir = check_output("pwd");
+	chomp $basedir;
+	$dir = "$basedir/$dir";
+}
+
+# Initializations and helper functions
+srand;
+
+my @childpids = ();
+my @cleanupcmds = ();
+
+sub cleanup {
+	print STDERR "Cleanup...\n";
+	for my $pid (@childpids){ unchecked_call("kill $pid"); }
+	for my $cmd (@cleanupcmds){ unchecked_call("$cmd"); }
+	exit 1;
+};
+# Always call cleanup, no matter how we exit
+*CORE::GLOBAL::exit = 
+    sub{ cleanup(); }; 
+$SIG{INT} = "cleanup";
+$SIG{TERM} = "cleanup";
+$SIG{HUP} = "cleanup";
+
+my $decoderBase = check_output("basename $decoder"); chomp $decoderBase;
+my $newIniFile = "$dir/$decoderBase.ini";
+my $inputFileName = "$dir/input";
+my $user = $ENV{"USER"};
+
+
+# process ini file
+-e $iniFile || die "Error: could not open $iniFile for reading\n";
+open(INI, $iniFile);
+
+if (-e $dir) {
+	die "ERROR: working dir $dir already exists\n\n";
+} else {
+	mkdir "$dir" or die "Can't mkdir $dir: $!";
+	mkdir "$dir/hgs" or die;
+	mkdir "$dir/scripts" or die;
+	print STDERR <<EOT;
+	DECODER:          $decoder
+	INI FILE:         $iniFile
+	WORKING DIR:      $dir
+	DEVSET:           $devset
+	EVAL METRIC:      $metric
+	MAX ITERATIONS:   $max_iterations
+	PARALLEL JOBS:    $jobs
+	HEAD NODE:        $host
+	PMEM (DECODING):  $pmem
+	INITIAL WEIGHTS:  $initial_weights
+EOT
+}
+
+# Generate initial files and values
+check_call("cp $iniFile $newIniFile");
+check_call("cp $initial_weights $dir/weights.0");
+$iniFile = $newIniFile;
+
+my $refs = "$dir/dev.refs";
+split_devset($devset, "$dir/dev.input.raw", $refs);
+my $newsrc = "$dir/dev.input";
+enseg("$dir/dev.input.raw", $newsrc);
+$srcFile = $newsrc;
+my $devSize = 0;
+open F, "<$srcFile" or die "Can't read $srcFile: $!";
+while(<F>) { $devSize++; }
+close F;
+
+unless($best_weights){ $best_weights = $weights; }
+unless($projected_score){ $projected_score = 0.0; }
+$seen_weights{$weights} = 1;
+
+my $random_seed = int(time / 1000);
+my $lastWeightsFile;
+my $lastPScore = 0;
+# main optimization loop
+my @allweights;
+while (1){
+	print STDERR "\n\nITERATION $iteration\n==========\n";
+
+	if ($iteration > $max_iterations){
+		print STDERR "\nREACHED STOPPING CRITERION: Maximum iterations\n";
+		last;
+	}
+	# iteration-specific files
+	my $runFile="$dir/run.raw.$iteration";
+	my $onebestFile="$dir/1best.$iteration";
+	my $logdir="$dir/logs.$iteration";
+	my $decoderLog="$logdir/decoder.sentserver.log.$iteration";
+	my $scorerLog="$logdir/scorer.log.$iteration";
+	check_call("mkdir -p $logdir");
+
+
+	#decode
+	print STDERR "RUNNING DECODER AT ";
+	print STDERR unchecked_output("date");
+	my $im1 = $iteration - 1;
+	my $weightsFile="$dir/weights.$im1";
+        push @allweights, "-w $dir/weights.$im1";
+        `rm -f $dir/hgs/*.gz`;
+	my $decoder_cmd = "$decoder -c $iniFile --weights$pass_suffix $weightsFile -O $dir/hgs";
+	my $pcmd;
+	if ($use_make) {
+		$pcmd = "cat $srcFile | $parallelize --use-fork -p $pmem -e $logdir -j $jobs --";
+	} else {
+		$pcmd = "cat $srcFile | $parallelize -p $pmem -e $logdir -j $jobs --";
+	}
+	my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $runFile";
+	print STDERR "COMMAND:\n$cmd\n";
+	check_bash_call($cmd);
+        my $num_hgs;
+        my $num_topbest;
+        my $retries = 0;
+	while($retries < 5) {
+	    $num_hgs = check_output("ls $dir/hgs/*.gz | wc -l");
+	    $num_topbest = check_output("wc -l < $runFile");
+	    print STDERR "NUMBER OF HGs: $num_hgs\n";
+	    print STDERR "NUMBER OF TOP-BEST HYPs: $num_topbest\n";
+	    if($devSize == $num_hgs && $devSize == $num_topbest) {
+		last;
+	    } else {
+		print STDERR "Incorrect number of hypergraphs or topbest. Waiting for distributed filesystem and retrying...\n";
+		sleep(3);
+	    }
+	    $retries++;
+	}
+	die "Dev set contains $devSize sentences, but we don't have topbest and hypergraphs for all these! Decoder failure? Check $decoderLog\n" if ($devSize != $num_hgs || $devSize != $num_topbest);
+	my $dec_score = check_output("cat $runFile | $SCORER -r $refs -m $metric");
+	chomp $dec_score;
+	print STDERR "DECODER SCORE: $dec_score\n";
+
+	# save space
+	check_call("gzip -f $runFile");
+	check_call("gzip -f $decoderLog");
+
+	# run optimizer
+	print STDERR "RUNNING OPTIMIZER AT ";
+	print STDERR unchecked_output("date");
+	print STDERR " - GENERATE TRAINING EXEMPLARS\n";
+	my $mergeLog="$logdir/prune-merge.log.$iteration";
+
+	my $score = 0;
+	my $icc = 0;
+	my $inweights="$dir/weights.$im1";
+	$cmd="$MAPINPUT $dir/hgs > $dir/agenda.$im1";
+	print STDERR "COMMAND:\n$cmd\n";
+	check_call($cmd);
+	check_call("mkdir -p $dir/splag.$im1");
+	$cmd="split -a 3 -l $lines_per_mapper $dir/agenda.$im1 $dir/splag.$im1/mapinput.";
+	print STDERR "COMMAND:\n$cmd\n";
+	check_call($cmd);
+	opendir(DIR, "$dir/splag.$im1") or die "Can't open directory: $!";
+	my @shards = grep { /^mapinput\./ } readdir(DIR);
+	closedir DIR;
+	die "No shards!" unless scalar @shards > 0;
+	my $joblist = "";
+	my $nmappers = 0;
+	@cleanupcmds = ();
+	my %o2i = ();
+	my $first_shard = 1;
+	my $mkfile; # only used with makefiles
+	my $mkfilename;
+	if ($use_make) {
+		$mkfilename = "$dir/splag.$im1/domap.mk";
+		open $mkfile, ">$mkfilename" or die "Couldn't write $mkfilename: $!";
+		print $mkfile "all: $dir/splag.$im1/map.done\n\n";
+	}
+	my @mkouts = ();  # only used with makefiles
+	my @mapoutputs = ();
+	for my $shard (@shards) {
+		my $mapoutput = $shard;
+		my $client_name = $shard;
+		$client_name =~ s/mapinput.//;
+		$client_name = "pro.$client_name";
+		$mapoutput =~ s/mapinput/mapoutput/;
+		push @mapoutputs, "$dir/splag.$im1/$mapoutput";
+		$o2i{"$dir/splag.$im1/$mapoutput"} = "$dir/splag.$im1/$shard";
+		my $script = "$MAPPER -s $srcFile -m $metric -r $refs -w $inweights -K $dir/kbest < $dir/splag.$im1/$shard > $dir/splag.$im1/$mapoutput";
+		if ($use_make) {
+			my $script_file = "$dir/scripts/map.$shard";
+			open F, ">$script_file" or die "Can't write $script_file: $!";
+			print F "#!/bin/bash\n";
+			print F "$script\n";
+			close F;
+			my $output = "$dir/splag.$im1/$mapoutput";
+			push @mkouts, $output;
+			chmod(0755, $script_file) or die "Can't chmod $script_file: $!";
+			if ($first_shard) { print STDERR "$script\n"; $first_shard=0; }
+			print $mkfile "$output: $dir/splag.$im1/$shard\n\t$script_file\n\n";
+		} else {
+			my $script_file = "$dir/scripts/map.$shard";
+			open F, ">$script_file" or die "Can't write $script_file: $!";
+			print F "$script\n";
+			close F;
+			if ($first_shard) { print STDERR "$script\n"; $first_shard=0; }
+
+			$nmappers++;
+			my $qcmd = "$QSUB_CMD -N $client_name -o /dev/null -e $logdir/$client_name.ER $script_file";
+			my $jobid = check_output("$qcmd");
+			chomp $jobid;
+			$jobid =~ s/^(\d+)(.*?)$/\1/g;
+			$jobid =~ s/^Your job (\d+) .*$/\1/;
+		 	push(@cleanupcmds, "qdel $jobid 2> /dev/null");
+			print STDERR " $jobid";
+			if ($joblist == "") { $joblist = $jobid; }
+			else {$joblist = $joblist . "\|" . $jobid; }
+		}
+	}
+	my @dev_outs = ();
+	my @devtest_outs = ();
+	@dev_outs = @mapoutputs;
+	if ($use_make) {
+		print $mkfile "$dir/splag.$im1/map.done: @mkouts\n\ttouch $dir/splag.$im1/map.done\n\n";
+		close $mkfile;
+		my $mcmd = "make -j $jobs -f $mkfilename";
+		print STDERR "\nExecuting: $mcmd\n";
+		check_call($mcmd);
+	} else {
+		print STDERR "\nLaunched $nmappers mappers.\n";
+      		sleep 8;
+		print STDERR "Waiting for mappers to complete...\n";
+		while ($nmappers > 0) {
+		  sleep 5;
+		  my @livejobs = grep(/$joblist/, split(/\n/, unchecked_output("qstat | grep -v ' C '")));
+		  $nmappers = scalar @livejobs;
+		}
+		print STDERR "All mappers complete.\n";
+	}
+	my $tol = 0;
+	my $til = 0;
+	my $dev_test_file = "$dir/splag.$im1/devtest.gz";
+	print STDERR "\nRUNNING CLASSIFIER (REDUCER)\n";
+	print STDERR unchecked_output("date");
+	$cmd="cat @dev_outs | $REDUCER -w $dir/weights.$im1 -C $reg -y $reg_previous --interpolate_with_weights $psi";
+        $cmd .= " > $dir/weights.$iteration";
+	print STDERR "COMMAND:\n$cmd\n";
+	check_bash_call($cmd);
+	$lastWeightsFile = "$dir/weights.$iteration";
+	$lastPScore = $score;
+	$iteration++;
+	print STDERR "\n==========\n";
+}
+
+
+check_call("cp $lastWeightsFile $dir/weights.final");
+print STDERR "\nFINAL WEIGHTS: $dir/weights.final\n(Use -w <this file> with the decoder)\n\n";
+print STDOUT "$dir/weights.final\n";
+
+exit 0;
+
+sub read_weights_file {
+  my ($file) = @_;
+  open F, "<$file" or die "Couldn't read $file: $!";
+  my @r = ();
+  my $pm = -1;
+  while(<F>) {
+    next if /^#/;
+    next if /^\s*$/;
+    chomp;
+    if (/^(.+)\s+(.+)$/) {
+      my $m = $1;
+      my $w = $2;
+      die "Weights out of order: $m <= $pm" unless $m > $pm;
+      push @r, $w;
+    } else {
+      warn "Unexpected feature name in weight file: $_";
+    }
+  }
+  close F;
+  return join ' ', @r;
+}
+
+sub enseg {
+	my $src = shift;
+	my $newsrc = shift;
+	open(SRC, $src);
+	open(NEWSRC, ">$newsrc");
+	my $i=0;
+	while (my $line=<SRC>){
+		chomp $line;
+		if ($line =~ /^\s*<seg/i) {
+		    if($line =~ /id="[0-9]+"/) {
+			print NEWSRC "$line\n";
+		    } else {
+			die "When using segments with pre-generated <seg> tags, you must include a zero-based id attribute";
+		    }
+		} else {
+			print NEWSRC "<seg id=\"$i\">$line</seg>\n";
+		}
+		$i++;
+	}
+	close SRC;
+	close NEWSRC;
+	die "Empty dev set!" if ($i == 0);
+}
+
+sub print_help {
+
+	my $executable = basename($0); chomp $executable;
+	print << "Help";
+
+Usage: $executable [options]
+
+	$executable [options]
+		Runs a complete PRO optimization using the ini file specified.
+
+Required:
+
+	--config <cdec.ini>
+		Decoder configuration file.
+
+	--devset <files>
+		Dev set source and reference data.
+
+	--weights <file>
+		Initial weights file (use empty file to start from 0)
+
+General options:
+
+	--help
+		Print this message and exit.
+
+	--max-iterations <M>
+		Maximum number of iterations to run.  If not specified, defaults
+		to $default_max_iter.
+
+	--metric <method>
+		Metric to optimize.
+		Example values: IBM_BLEU, NIST_BLEU, Koehn_BLEU, TER, Combi
+
+	--pass-suffix <S>
+		If the decoder is doing multi-pass decoding, the pass suffix "2",
+		"3", etc., is used to control what iteration of weights is set.
+
+	--workdir <dir>
+		Directory for intermediate and output files.  If not specified, the
+		name is derived from the ini filename.  Assuming that the ini
+		filename begins with the decoder name and ends with ini, the default
+		name of the working directory is inferred from the middle part of
+		the filename.  E.g. an ini file named decoder.foo.ini would have
+		a default working directory name foo.
+
+Regularization options:
+
+	--reg <F>
+		l2 regularization strength [default=500]. The greater this value,
+		the closer to zero the weights will be.
+
+	--reg-previous <F>
+		l2 penalty for moving away from the weights from the previous
+		iteration. [default=5000]. The greater this value, the closer
+		to the previous iteration's weights the next iteration's weights
+		will be.
+
+Job control options:
+
+	--jobs <I>
+		Number of decoder processes to run in parallel. [default=$default_jobs]
+
+	--qsub
+		Use qsub to run jobs in parallel (qsub must be configured in
+		environment/LocalEnvironment.pm)
+
+	--pmem <N>
+		Amount of physical memory requested for parallel decoding jobs
+		(used with qsub requests only)
+
+Deprecated options:
+
+	--interpolate-with-weights <F>
+		[deprecated] At each iteration the resulting weights are
+		interpolated with the weights from the previous iteration, with
+		this factor. [default=1.0, i.e., no effect]
+
+Help
+}
+
+sub convert {
+  my ($str) = @_;
+  my @ps = split /;/, $str;
+  my %dict = ();
+  for my $p (@ps) {
+    my ($k, $v) = split /=/, $p;
+    $dict{$k} = $v;
+  }
+  return %dict;
+}
+
+
+sub cmdline {
+    return join ' ',($0,@ORIG_ARGV);
+}
+
+#buggy: last arg gets quoted sometimes?
+my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]};
+my $shell_escape_in_quote=qr{[\\"\$`!]};
+
+sub escape_shell {
+    my ($arg)=@_;
+    return undef unless defined $arg;
+    if ($arg =~ /$is_shell_special/) {
+        $arg =~ s/($shell_escape_in_quote)/\\$1/g;
+        return "\"$arg\"";
+    }
+    return $arg;
+}
+
+sub escaped_shell_args {
+    return map {local $_=$_;chomp;escape_shell($_)} @_;
+}
+
+sub escaped_shell_args_str {
+    return join ' ',&escaped_shell_args(@_);
+}
+
+sub escaped_cmdline {
+    return "$0 ".&escaped_shell_args_str(@ORIG_ARGV);
+}
+
+sub split_devset {
+  my ($infile, $outsrc, $outref) = @_;
+  open F, "<$infile" or die "Can't read $infile: $!";
+  open S, ">$outsrc" or die "Can't write $outsrc: $!";
+  open R, ">$outref" or die "Can't write $outref: $!";
+  while(<F>) {
+    chomp;
+    my ($src, @refs) = split /\s*\|\|\|\s*/;
+    die "Malformed devset line: $_\n" unless scalar @refs > 0;
+    print S "$src\n";
+    print R join(' ||| ', @refs) . "\n";
+  }
+  close R;
+  close S;
+  close F;
+}
+
-- 
cgit v1.2.3