summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-11-15 21:36:24 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-11-15 21:36:24 +0000
commit4be482464302a5118ec246b10a723f934830a6d7 (patch)
treef7fd6dbf6e755909bd02aecd88558626f5b5aac6
parentd6d06b9c2d81adcaededab27c5e022e4718dd4c6 (diff)
cdec2zmert script
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@727 ec762483-ff6d-05da-a07a-a48fb63a330f
-rwxr-xr-xrescore/cdec_kbest_to_zmert.pl63
1 files changed, 63 insertions, 0 deletions
diff --git a/rescore/cdec_kbest_to_zmert.pl b/rescore/cdec_kbest_to_zmert.pl
new file mode 100755
index 00000000..02808572
--- /dev/null
+++ b/rescore/cdec_kbest_to_zmert.pl
@@ -0,0 +1,63 @@
+#!/usr/bin/perl -w
+
+use strict;
+use utf8;
+use Getopt::Long;
+
+my $feature_file;
+my $hyp_file;
+my $help;
+
+Getopt::Long::Configure("no_auto_abbrev");
+if (GetOptions(
+ "feature_file|f=s" => \$feature_file,
+ "hypothesis_file|h=s" => \$hyp_file,
+ "help" => \$help,
+) == 0 || @ARGV!=0 || $help || !$feature_file || !$hyp_file) {
+ usage();
+ exit(1);
+}
+
+open W, "<$feature_file" or die "Can't read $feature_file: $!";
+my %weights;
+while(<W>) {
+ chomp;
+ next if /^#/;
+ next if /^\s*$/;
+ my ($fname, $w) = split /\s+/;
+ $weights{$fname} = 1;
+}
+close W;
+my @all_feats = sort keys %weights;
+
+open HYP, "<$hyp_file" or die "Can't read $hyp_file: $!";
+while(<HYP>) {
+ chomp;
+ my ($id, $hyp, $feats) = split / \|\|\| /;
+ my @afeats = split /\s+/, $feats;
+ my $tot = 0;
+ my %fvaldict;
+ for my $featpair (@afeats) {
+ my ($fname,$fval) = split /=/, $featpair;
+ $fvaldict{$fname} = $fval;
+ my $weight = $weights{$fname};
+ warn "Feature '$fname' not mentioned in feature file $feature_file" unless defined $weight;
+ $weights{$fname} = 1;
+ }
+ my @trans;
+ for my $feat (@all_feats) {
+ my $v = $fvaldict{$feat};
+ if (!defined $v) { $v = '0.0'; }
+ push @trans, $v;
+ }
+ print "$id ||| $hyp ||| @trans\n";
+}
+close HYP;
+
+sub usage {
+ print <<EOT;
+Usage: $0 -f feature-file.txt/weights.txt -h hyp.nbest.txt
+ Puts a cdec k-best list into Joshua/ZMERT format
+EOT
+}
+