diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-11-15 21:36:24 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-11-15 21:36:24 +0000 |
commit | 2b869351007ad0b0c208dae91018aafc7e039402 (patch) | |
tree | 9b0b79112060813a46919c83a813a98b6d4fabe0 | |
parent | c8c315a4f78c464636ea5e3fd9a11416b2f966b9 (diff) |
cdec2zmert script
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@727 ec762483-ff6d-05da-a07a-a48fb63a330f
-rwxr-xr-x | rescore/cdec_kbest_to_zmert.pl | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/rescore/cdec_kbest_to_zmert.pl b/rescore/cdec_kbest_to_zmert.pl new file mode 100755 index 00000000..02808572 --- /dev/null +++ b/rescore/cdec_kbest_to_zmert.pl @@ -0,0 +1,63 @@ +#!/usr/bin/perl -w + +use strict; +use utf8; +use Getopt::Long; + +my $feature_file; +my $hyp_file; +my $help; + +Getopt::Long::Configure("no_auto_abbrev"); +if (GetOptions( + "feature_file|f=s" => \$feature_file, + "hypothesis_file|h=s" => \$hyp_file, + "help" => \$help, +) == 0 || @ARGV!=0 || $help || !$feature_file || !$hyp_file) { + usage(); + exit(1); +} + +open W, "<$feature_file" or die "Can't read $feature_file: $!"; +my %weights; +while(<W>) { + chomp; + next if /^#/; + next if /^\s*$/; + my ($fname, $w) = split /\s+/; + $weights{$fname} = 1; +} +close W; +my @all_feats = sort keys %weights; + +open HYP, "<$hyp_file" or die "Can't read $hyp_file: $!"; +while(<HYP>) { + chomp; + my ($id, $hyp, $feats) = split / \|\|\| /; + my @afeats = split /\s+/, $feats; + my $tot = 0; + my %fvaldict; + for my $featpair (@afeats) { + my ($fname,$fval) = split /=/, $featpair; + $fvaldict{$fname} = $fval; + my $weight = $weights{$fname}; + warn "Feature '$fname' not mentioned in feature file $feature_file" unless defined $weight; + $weights{$fname} = 1; + } + my @trans; + for my $feat (@all_feats) { + my $v = $fvaldict{$feat}; + if (!defined $v) { $v = '0.0'; } + push @trans, $v; + } + print "$id ||| $hyp ||| @trans\n"; +} +close HYP; + +sub usage { + print <<EOT; +Usage: $0 -f feature-file.txt/weights.txt -h hyp.nbest.txt + Puts a cdec k-best list into Joshua/ZMERT format +EOT +} + |