From e1eae0ac941aa76528d4673dbd35f214cdac23fb Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Thu, 2 Feb 2012 17:19:40 -0500 Subject: remove some dead code to clean things up --- rescore/cdec_kbest_to_zmert.pl | 64 ------------- rescore/example/README | 4 - rescore/example/cdec.ini | 2 - rescore/example/hyp.txt | 5 - rescore/example/small.scfg | 9 -- rescore/example/source.txt | 2 - rescore/example/weights | 1 - rescore/generate_zmert_params_from_weights.pl | 26 ------ rescore/rerank.pl | 86 ------------------ rescore/rescore_inv_model1.pl | 126 -------------------------- rescore/rescore_with_cdec_model.pl | 121 ------------------------- 11 files changed, 446 deletions(-) delete mode 100755 rescore/cdec_kbest_to_zmert.pl delete mode 100644 rescore/example/README delete mode 100644 rescore/example/cdec.ini delete mode 100644 rescore/example/hyp.txt delete mode 100644 rescore/example/small.scfg delete mode 100644 rescore/example/source.txt delete mode 100644 rescore/example/weights delete mode 100755 rescore/generate_zmert_params_from_weights.pl delete mode 100755 rescore/rerank.pl delete mode 100755 rescore/rescore_inv_model1.pl delete mode 100755 rescore/rescore_with_cdec_model.pl (limited to 'rescore') diff --git a/rescore/cdec_kbest_to_zmert.pl b/rescore/cdec_kbest_to_zmert.pl deleted file mode 100755 index 88bc9682..00000000 --- a/rescore/cdec_kbest_to_zmert.pl +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/perl -w - -use strict; -use utf8; -use Getopt::Long; - -my $feature_file; -my $hyp_file; -my $help; - -Getopt::Long::Configure("no_auto_abbrev"); -if (GetOptions( - "feature_file|f=s" => \$feature_file, - "hypothesis_file|h=s" => \$hyp_file, - "help" => \$help, -) == 0 || @ARGV!=0 || $help || !$feature_file || !$hyp_file) { - usage(); - exit(1); -} - -open W, "<$feature_file" or die "Can't read $feature_file: $!"; -my %weights; -my @all_feats; -while() { - chomp; - next if /^#/; - next if /^\s*$/; - my ($fname, $w) = split /\s+/; - push @all_feats, $fname; - $weights{$fname} = 1; -} -close W; - -open HYP, "<$hyp_file" or die "Can't read $hyp_file: $!"; -while() { - chomp; - my ($id, $hyp, $feats) = split / \|\|\| /; - my @afeats = split /\s+/, $feats; - my $tot = 0; - my %fvaldict; - for my $featpair (@afeats) { - my ($fname,$fval) = split /=/, $featpair; - $fvaldict{$fname} = $fval; - my $weight = $weights{$fname}; - warn "Feature '$fname' not mentioned in feature file $feature_file" unless defined $weight; - $weights{$fname} = 1; - } - my @trans; - for my $feat (@all_feats) { - my $v = $fvaldict{$feat}; - if (!defined $v) { $v = '0.0'; } - push @trans, $v; - } - print "$id ||| $hyp ||| @trans\n"; -} -close HYP; - -sub usage { - print <) { - next if /^#/; - chomp; - next if /^\s*$/; - s/^\s+//; - s/\s+$//; - my ($a,$b) = split /\s+/; - next unless ($a && $b); - my $line = $DEFAULT; - if ($defaults{$a}) { $line = $defaults{$a}; } - print "$a\t|||\t$b\t$line\n"; -} - -print "normalization = none\n"; - diff --git a/rescore/rerank.pl b/rescore/rerank.pl deleted file mode 100755 index 4a0c5750..00000000 --- a/rescore/rerank.pl +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/perl -w - -use strict; -use utf8; -use Getopt::Long; - -my $weights_file; -my $hyp_file; -my $help; -my $kbest; # flag to extract reranked list - -Getopt::Long::Configure("no_auto_abbrev"); -if (GetOptions( - "weights_file|w=s" => \$weights_file, - "hypothesis_file|h=s" => \$hyp_file, - "kbest" => \$kbest, - "help" => \$help, -) == 0 || @ARGV!=0 || $help || !$weights_file || !$hyp_file) { - usage(); - exit(1); -} - -open W, "<$weights_file" or die "Can't read $weights_file: $!"; -my %weights; -while() { - chomp; - next if /^#/; - next if /^\s*$/; - my ($fname, $w) = split /\s+/; - $weights{$fname} = $w; -} -close W; - -my $cur = undef; -my %hyps = (); -open HYP, "<$hyp_file" or die "Can't read $hyp_file: $!"; -while() { - chomp; - my ($id, $hyp, $feats) = split / \|\|\| /; - unless (defined $cur) { $cur = $id; } - if ($cur ne $id) { - extract_1best($cur, \%hyps); - $cur = $id; - %hyps = (); - } - my @afeats = split /\s+/, $feats; - my $tot = 0; - for my $featpair (@afeats) { - my ($fname,$fval) = split /=/, $featpair; - my $weight = $weights{$fname}; - die "Unweighted feature '$fname'" unless defined $weight; - $tot += ($weight * $fval); - } - $hyps{"$hyp ||| $feats"} = $tot; -} -extract_1best($cur, \%hyps) if defined $cur; -close HYP; - -sub extract_1best { - my ($id, $rh) = @_; - my %hyps = %$rh; - if ($kbest) { - for my $hyp (sort { $hyps{$b} <=> $hyps{$a} } keys %hyps) { - print "$id ||| $hyp\n"; - } - } else { - my $best_score = undef; - my $best_hyp = undef; - for my $hyp (keys %hyps) { - if (!defined $best_score || $hyps{$hyp} > $best_score) { - $best_score = $hyps{$hyp}; - $best_hyp = $hyp; - } - } - $best_hyp =~ s/ \|\|\|.*$//; - print "$best_hyp\n"; - } -} - -sub usage { - print < \$model_file, - "source_file|s=s" => \$src_file, - "feature_name|f=s" => \$feature_name, - "hypothesis_file|h=s" => \$hyp_file, - "help" => \$help, -) == 0 || @ARGV!=0 || $help || !$model_file || !$src_file || !$hyp_file) { - usage(); - exit; -} - -binmode STDIN, ":utf8"; -binmode STDOUT, ":utf8"; -binmode STDERR, ":utf8"; - -print STDERR "Reading Model 1 probabilities from $model_file...\n"; -open M, "<$model_file" or die "Couldn't read $model_file: $!"; -binmode M, ":utf8"; -my %m1; -while(){ - chomp; - my ($e,$f,$lp) = split /\s+/; - die unless defined $e; - die unless defined $f; - die unless defined $lp; - $m1{$f}->{$e} = $lp; -} -close M; - -open SRC, "<$src_file" or die "Can't read $src_file: $!"; -open HYP, "<$hyp_file" or die "Can't read $hyp_file: $!"; -binmode(SRC,":utf8"); -binmode(HYP,":utf8"); -binmode(STDOUT,":utf8"); -my @source; while(){chomp; push @source, $_; } -close SRC; -my $src_len = scalar @source; -print STDERR "Read $src_len sentences...\n"; -print STDERR "Rescoring...\n"; - -my $cur = undef; -my @hyps = (); -my @feats = (); -while() { - chomp; - my ($id, $hyp, $feats) = split / \|\|\| /; - unless (defined $cur) { $cur = $id; } - die "sentence ids in k-best list file must be between 0 and $src_len" if $id < 0 || $id > $src_len; - if ($cur ne $id) { - rescore($cur, $source[$cur], \@hyps, \@feats); - $cur = $id; - @hyps = (); - @feats = (); - } - push @hyps, $hyp; - push @feats, $feats; -} -rescore($cur, $source[$cur], \@hyps, \@feats) if defined $cur; - -sub rescore { - my ($id, $src, $rh, $rf) = @_; - my @hyps = @$rh; - my @feats = @$rf; - my $nhyps = scalar @hyps; - my %cache = (); - print STDERR "RESCORING SENTENCE id=$id (# hypotheses=$nhyps)...\n"; - for (my $i=0; $i < $nhyps; $i++) { - my $score = $cache{$hyps[$i]}; - if (!defined $score) { - if ($reverse_model) { - die "not implemented"; - } else { - $score = m1_prob($src, $hyps[$i]); - } - $cache{$hyps[$i]} = $score; - } - print "$id ||| $hyps[$i] ||| $feats[$i] $feature_name=$score\n"; - } - -} - -sub m1_prob { - my ($fsent, $esent) = @_; - die unless defined $fsent; - die unless defined $esent; - my @fwords = split /\s+/, $fsent; - my @ewords = split /\s+/, $esent; - push @ewords, ""; - my $tp = 0; - for my $f (@fwords) { - my $m1f = $m1{$f}; - if (!defined $m1f) { $m1f = {}; } - my $tfp = 0; - for my $e (@ewords) { - my $lp = $m1f->{$e}; - if (!defined $lp) { $lp = -100; } - #print "P($f|$e) = $lp\n"; - my $prob = exp($lp); - #if ($prob > $tfp) { $tfp = $prob; } - $tfp += $prob; - } - $tp += log($tfp); - $tp -= log(scalar @ewords); # uniform probability of each generating word - } - return $tp; -} - -sub usage { - print STDERR "Usage: $0 -m model_file.txt -h hypothesis.nbest -s source.txt\n Adds the back-translation probability under Model 1\n Use training/model1 to generate the required parameter file\n"; -} - - diff --git a/rescore/rescore_with_cdec_model.pl b/rescore/rescore_with_cdec_model.pl deleted file mode 100755 index cdd8c217..00000000 --- a/rescore/rescore_with_cdec_model.pl +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/perl -w - -use strict; -use utf8; -my @ORIG_ARGV=@ARGV; -use Cwd qw(getcwd); -my $SCRIPT_DIR; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $SCRIPT_DIR = dirname(abs_path($0)); push @INC, $SCRIPT_DIR, "$SCRIPT_DIR/../environment"; } -use LocalConfig; -use Getopt::Long; -use IPC::Open2; -use POSIX ":sys_wait_h"; - -my $decoder = "$SCRIPT_DIR/../decoder/cdec"; -my $help; -my $cdec_ini; -my $src_file; -my $hyp_file; -my $reverse_model; -my $weights_file; -my $feature_name='NewModel'; - -sub catch_pipe { - my $signame = shift; - die "$0 received SIGPIPE: did the decoder die?\n"; -} -$SIG{PIPE} = \&catch_pipe; - -Getopt::Long::Configure("no_auto_abbrev"); -if (GetOptions( - "config|c=s" => \$cdec_ini, - "weights|w=s" => \$weights_file, - "source_file|s=s" => \$src_file, - "feature_name|f=s" => \$feature_name, - "hypothesis_file|h=s" => \$hyp_file, - "reverse" => \$reverse_model, # if true translate hyp -> src - "decoder=s" => \$decoder, - "help" => \$help, -) == 0 || @ARGV!=0 || $help || !$cdec_ini || !$src_file || !$hyp_file) { - usage(); - exit; -} -die "Can't find $decoder" unless -f $decoder; -die "Can't run $decoder" unless -x $decoder; -my $weights = ''; -if (defined $weights_file) { - die "Can't read $weights_file" unless -f $weights_file; - $weights = "-w $weights_file"; -} -my $decoder_command = "$decoder -c $cdec_ini --quiet $weights --show_conditional_prob"; -print STDERR "DECODER COMMAND: $decoder_command\n"; -my $cdec_pid = open2(\*CDEC_IN, \*CDEC_OUT, $decoder_command) - or die "Couldn't run $decoder: $!"; -sleep 1; - -die "Can't find $cdec_ini" unless -f $cdec_ini; -open SRC, "<$src_file" or die "Can't read $src_file: $!"; -open HYP, "<$hyp_file" or die "Can't read $hyp_file: $!"; -binmode(SRC,":utf8"); -binmode(HYP,":utf8"); -binmode(STDOUT,":utf8"); -my @source; while(){chomp; push @source, $_; } -close SRC; -my $src_len = scalar @source; -print STDERR "Read $src_len sentences...\n"; -binmode(CDEC_IN, ":utf8"); -binmode(CDEC_OUT, ":utf8"); - -my $cur = undef; -my @hyps = (); -my @feats = (); -while() { - chomp; - my ($id, $hyp, $feats) = split / \|\|\| /; - unless (defined $cur) { $cur = $id; } - die "sentence ids in k-best list file must be between 0 and $src_len" if $id < 0 || $id > $src_len; - if ($cur ne $id) { - rescore($cur, $source[$cur], \@hyps, \@feats); - $cur = $id; - @hyps = (); - @feats = (); - } - push @hyps, $hyp; - push @feats, $feats; -} -rescore($cur, $source[$cur], \@hyps, \@feats) if defined $cur; - -close CDEC_IN; -close CDEC_OUT; -close HYP; -waitpid($cdec_pid, 0); -my $status = $? >> 8; -if ($status != 0) { - print STDERR "Decoder returned bad status!\n"; -} - -sub rescore { - my ($id, $src, $rh, $rf) = @_; - my @hyps = @$rh; - my @feats = @$rf; - my $nhyps = scalar @hyps; - print STDERR "RESCORING SENTENCE id=$id (# hypotheses=$nhyps)...\n"; - for (my $i=0; $i < $nhyps; $i++) { - if ($reverse_model) { - print CDEC_OUT "$hyps[$i] ||| $src\n"; - } else { - print CDEC_OUT "$src ||| $hyps[$i]\n"; - } - my $score = ; - chomp $score; - my @words = split /\s+/, $hyps[$i]; - print "$id ||| $hyps[$i] ||| $feats[$i] $feature_name=$score\n"; - } -} - -sub usage { - print <