From fd5f83ba7c24d041e8378d6bbfb4325f20eb0dd7 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 11 Nov 2012 21:38:33 -0500 Subject: switch to new multi-reference format for mert --- dpmert/dpmert.pl | 56 ++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 18 deletions(-) (limited to 'dpmert') diff --git a/dpmert/dpmert.pl b/dpmert/dpmert.pl index 52ce0fc0..2e6a9728 100755 --- a/dpmert/dpmert.pl +++ b/dpmert/dpmert.pl @@ -67,6 +67,7 @@ my $bleu_weight=1; my $use_make = 1; # use make to parallelize line search my $useqsub; my $pass_suffix = ''; +my $devset = ''; my $cpbin=1; # Process command-line options Getopt::Long::Configure("no_auto_abbrev"); @@ -85,12 +86,13 @@ if (GetOptions( "pmem=s" => \$pmem, "cpbin!" => \$cpbin, "random-directions=i" => \$rand_directions, + "devset=s" => \$devset, "ref-files=s" => \$refFiles, "metric=s" => \$metric, "source-file=s" => \$srcFile, "weights=s" => \$initialWeights, "workdir=s" => \$dir, - "opt-iterations=i" => \$optimization_iters, + "opt-iterations=i" => \$optimization_iters, ) == 0 || @ARGV!=1 || $help) { print_help(); exit; @@ -102,8 +104,17 @@ if ($useqsub) { } my @missing_args = (); -if (!defined $srcFile) { push @missing_args, "--source-file"; } -if (!defined $refFiles) { push @missing_args, "--ref-files"; } +if (defined $srcFile || defined $refFiles) { + die <$script_file" or die "Can't write $script_file: $!"; @@ -490,14 +501,6 @@ sub get_lines { return $lc; } -sub get_comma_sep_refs { - my ($r,$p) = @_; - my $o = check_output("echo $p"); - chomp $o; - my @files = split /\s+/, $o; - return "-$r " . join(" -$r ", @files); -} - sub read_weights_file { my ($file) = @_; open F, "<$file" or die "Couldn't read $file: $!"; @@ -530,8 +533,7 @@ sub write_config { print $fh "DECODER: $decoder\n"; print $fh "INI FILE: $iniFile\n"; print $fh "WORKING DIR: $dir\n"; - print $fh "SOURCE (DEV): $srcFile\n"; - print $fh "REFS (DEV): $refFiles\n"; + print $fh "DEVSET: $devset\n"; print $fh "EVAL METRIC: $metric\n"; print $fh "START ITERATION: $iteration\n"; print $fh "MAX ITERATIONS: $max_iterations\n"; @@ -698,3 +700,21 @@ sub escaped_shell_args_str { sub escaped_cmdline { return "$0 ".&escaped_shell_args_str(@ORIG_ARGV); } + +sub split_devset { + my ($infile, $outsrc, $outref) = @_; + open F, "<$infile" or die "Can't read $infile: $!"; + open S, ">$outsrc" or die "Can't write $outsrc: $!"; + open R, ">$outref" or die "Can't write $outref: $!"; + while() { + chomp; + my ($src, @refs) = split /\s*\|\|\|\s*/; + die "Malformed devset line: $_\n" unless scalar @refs > 0; + print S "$src\n"; + print R join(' ||| ', @refs) . "\n"; + } + close R; + close S; + close F; +} + -- cgit v1.2.3