#!/usr/bin/env perl
use strict;
my @ORIG_ARGV=@ARGV;
use Cwd qw(getcwd);
my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../../environment"; }

# Skip local config (used for distributing jobs) if we're running in local-only mode
use LocalConfig;
use Getopt::Long;
use File::Basename qw(basename);
my $QSUB_CMD = qsub_args(mert_memory());

require "libcall.pl";

# Default settings
my $default_jobs = env_default_jobs();
my $bin_dir = $SCRIPT_DIR;
die "Bin directory $bin_dir missing/inaccessible" unless -d $bin_dir;
my $FAST_SCORE="$bin_dir/../../mteval/fast_score";
die "Can't execute $FAST_SCORE" unless -x $FAST_SCORE;
my $parallelize = "$bin_dir/parallelize.pl";
my $libcall = "$bin_dir/libcall.pl";
my $sentserver = "$bin_dir/sentserver";
my $sentclient = "$bin_dir/sentclient";
my $LocalConfig = "$SCRIPT_DIR/../../environment/LocalConfig.pm";

my $SCORER = $FAST_SCORE;
my $cdec = "$bin_dir/../../decoder/cdec";
die "Can't find decoder in $cdec" unless -x $cdec;
die "Can't find $parallelize" unless -x $parallelize;
die "Can't find $libcall" unless -e $libcall;
my $decoder = $cdec;
my $jobs = $default_jobs;   # number of decode nodes
my $pmem = "9g";
my $help = 0;
my $config;
my $test_set;
my $weights;
my $use_make = 1;
my $useqsub;
my $cpbin=1;
my $base_dir;
# Process command-line options
if (GetOptions(
	"jobs=i" => \$jobs,
	"help" => \$help,
	"qsub" => \$useqsub,
	"input=s" => \$test_set,
        "config=s" => \$config,
	"weights=s" => \$weights,
        "dir=s" => \$base_dir,
) == 0 || @ARGV!=0 || $help) {
	print_help();
	exit;
}

if ($useqsub) {
  $use_make = 0;
  die "LocalEnvironment.pm does not have qsub configuration for this host. Cannot run with --qsub!\n" unless has_qsub();
}

my @missing_args = ();

if (!defined $test_set) { push @missing_args, "--input"; }
if (!defined $config) { push @missing_args, "--config"; }
if (!defined $weights) { push @missing_args, "--weights"; }
die "Please specify missing arguments: " . join (', ', @missing_args) . "\nUse --help for more information.\n" if (@missing_args);

my @tf = localtime(time);
my $tname = basename($test_set);
$tname =~ s/\.(sgm|sgml|xml)$//i;
my $dir = "eval.$tname." . sprintf('%d%02d%02d-%02d%02d%02d', 1900+$tf[5], $tf[4], $tf[3], $tf[2], $tf[1], $tf[0]);
if ($base_dir) {
  $dir = $base_dir.'/'.$dir
}
my $time = unchecked_output("date");

check_call("mkdir -p $dir");

split_devset($test_set, "$dir/test.input.raw", "$dir/test.refs");
my $refs = "-r $dir/test.refs";
my $newsrc = "$dir/test.input";
enseg("$dir/test.input.raw", $newsrc);
my $src_file = $newsrc;
open F, "<$src_file" or die "Can't read $src_file: $!"; close F;

my $test_trans="$dir/test.trans";
my $logdir="$dir/logs";
my $decoderLog="$logdir/decoder.sentserver.log";
check_call("mkdir -p $logdir");

#decode
print STDERR "RUNNING DECODER AT ";
print STDERR unchecked_output("date");
my $decoder_cmd = "$decoder -c $config --weights $weights";
my $pcmd;
if ($use_make) {
	$pcmd = "cat $src_file | $parallelize --workdir $dir --use-fork -p $pmem -e $logdir -j $jobs --";
} else {
	$pcmd = "cat $src_file | $parallelize --workdir $dir -p $pmem -e $logdir -j $jobs --";
}
my $cmd = "$pcmd $decoder_cmd 2> $decoderLog 1> $test_trans";
check_bash_call($cmd);
print STDERR "DECODER COMPLETED AT ";
print STDERR unchecked_output("date");
print STDERR "\nOUTPUT: $test_trans\n\n";
my $bleu = check_output("cat $test_trans | $SCORER $refs -m ibm_bleu");
chomp $bleu;
print STDERR "BLEU: $bleu\n";
print STDOUT "BLEU: $bleu\n";
my $ter = check_output("cat $test_trans | $SCORER $refs -m ter");
chomp $ter;
print STDERR " TER: $ter\n";
open TR, ">$dir/test.scores" or die "Can't write $dir/test.scores: $!";
my $score_report = <<EOT;
### SCORE REPORT #############################################################
        OUTPUT=$test_trans
  SCRIPT INPUT=$test_set
 DECODER INPUT=$src_file
    REFERENCES=$dir/test.refs
------------------------------------------------------------------------------
          BLEU=$bleu
           TER=$ter
##############################################################################
EOT

print TR $score_report;
print STDOUT $score_report;
close TR;
my $sr = unchecked_output("cat $dir/test.scores");
print STDERR "\n\n$sr\n(A copy of this report can be found in $dir/test.scores)\n\n";
exit 0;

sub enseg {
	my $src = shift;
	my $newsrc = shift;
	open(SRC, $src);
	open(NEWSRC, ">$newsrc");
	my $i=0;
	while (my $line=<SRC>){
		chomp $line;
		if ($line =~ /^\s*<seg/i) {
		    if($line =~ /id="[0-9]+"/) {
			print NEWSRC "$line\n";
		    } else {
			die "When using segments with pre-generated <seg> tags, you must include a zero-based id attribute";
		    }
		} else {
			print NEWSRC "<seg id=\"$i\">$line</seg>\n";
		}
		$i++;
	}
	close SRC;
	close NEWSRC;
}

sub print_help {
	my $executable = basename($0); chomp $executable;
	print << "Help";

Usage: $executable [options] <ini file>

	$executable --config cdec.ini --weights weights.txt [--jobs N] [--qsub] <testset.in-ref>

Options:

	--help
		Print this message and exit.

	--config <file>
		A path to the cdec.ini file.

	--weights <file>
		A file specifying feature weights.

	--dir <dir>
		Base directory where directory with evaluation results
                will be located.

Job control options:

	--jobs <I>
		Number of decoder processes to run in parallel. [default=$default_jobs]

	--qsub
		Use qsub to run jobs in parallel (qsub must be configured in
		environment/LocalEnvironment.pm)

	--pmem <N>
		Amount of physical memory requested for parallel decoding jobs
		(used with qsub requests only)

Help
}

sub convert {
  my ($str) = @_;
  my @ps = split /;/, $str;
  my %dict = ();
  for my $p (@ps) {
    my ($k, $v) = split /=/, $p;
    $dict{$k} = $v;
  }
  return %dict;
}



sub cmdline {
    return join ' ',($0,@ORIG_ARGV);
}

#buggy: last arg gets quoted sometimes?
my $is_shell_special=qr{[ \t\n\\><|&;"'`~*?{}$!()]};
my $shell_escape_in_quote=qr{[\\"\$`!]};

sub escape_shell {
    my ($arg)=@_;
    return undef unless defined $arg;
    if ($arg =~ /$is_shell_special/) {
        $arg =~ s/($shell_escape_in_quote)/\\$1/g;
        return "\"$arg\"";
    }
    return $arg;
}

sub escaped_shell_args {
    return map {local $_=$_;chomp;escape_shell($_)} @_;
}

sub escaped_shell_args_str {
    return join ' ',&escaped_shell_args(@_);
}

sub escaped_cmdline {
    return "$0 ".&escaped_shell_args_str(@ORIG_ARGV);
}

sub split_devset {
  my ($infile, $outsrc, $outref) = @_;
  open F, "<$infile" or die "Can't read $infile: $!";
  open S, ">$outsrc" or die "Can't write $outsrc: $!";
  open R, ">$outref" or die "Can't write $outref: $!";
  while(<F>) {
    chomp;
    my ($src, @refs) = split /\s*\|\|\|\s*/;
    die "Malformed devset line: $_\n" unless scalar @refs > 0;
    print S "$src\n";
    print R join(' ||| ', @refs) . "\n";
  }
  close R;
  close S;
  close F;
}