From 4be482464302a5118ec246b10a723f934830a6d7 Mon Sep 17 00:00:00 2001 From: redpony Date: Mon, 15 Nov 2010 21:36:24 +0000 Subject: cdec2zmert script git-svn-id: https://ws10smt.googlecode.com/svn/trunk@727 ec762483-ff6d-05da-a07a-a48fb63a330f --- rescore/cdec_kbest_to_zmert.pl | 63 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100755 rescore/cdec_kbest_to_zmert.pl diff --git a/rescore/cdec_kbest_to_zmert.pl b/rescore/cdec_kbest_to_zmert.pl new file mode 100755 index 00000000..02808572 --- /dev/null +++ b/rescore/cdec_kbest_to_zmert.pl @@ -0,0 +1,63 @@ +#!/usr/bin/perl -w + +use strict; +use utf8; +use Getopt::Long; + +my $feature_file; +my $hyp_file; +my $help; + +Getopt::Long::Configure("no_auto_abbrev"); +if (GetOptions( + "feature_file|f=s" => \$feature_file, + "hypothesis_file|h=s" => \$hyp_file, + "help" => \$help, +) == 0 || @ARGV!=0 || $help || !$feature_file || !$hyp_file) { + usage(); + exit(1); +} + +open W, "<$feature_file" or die "Can't read $feature_file: $!"; +my %weights; +while() { + chomp; + next if /^#/; + next if /^\s*$/; + my ($fname, $w) = split /\s+/; + $weights{$fname} = 1; +} +close W; +my @all_feats = sort keys %weights; + +open HYP, "<$hyp_file" or die "Can't read $hyp_file: $!"; +while() { + chomp; + my ($id, $hyp, $feats) = split / \|\|\| /; + my @afeats = split /\s+/, $feats; + my $tot = 0; + my %fvaldict; + for my $featpair (@afeats) { + my ($fname,$fval) = split /=/, $featpair; + $fvaldict{$fname} = $fval; + my $weight = $weights{$fname}; + warn "Feature '$fname' not mentioned in feature file $feature_file" unless defined $weight; + $weights{$fname} = 1; + } + my @trans; + for my $feat (@all_feats) { + my $v = $fvaldict{$feat}; + if (!defined $v) { $v = '0.0'; } + push @trans, $v; + } + print "$id ||| $hyp ||| @trans\n"; +} +close HYP; + +sub usage { + print <