From da176941c1f481f14e93bd7d055cc29cac0ea8c8 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sun, 12 Aug 2012 23:33:21 -0400 Subject: use new union api --- extools/coarsen_grammar.pl | 133 --------------------------------------------- 1 file changed, 133 deletions(-) delete mode 100755 extools/coarsen_grammar.pl (limited to 'extools/coarsen_grammar.pl') diff --git a/extools/coarsen_grammar.pl b/extools/coarsen_grammar.pl deleted file mode 100755 index f2dd6689..00000000 --- a/extools/coarsen_grammar.pl +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/perl - -# dumb grammar coarsener that maps every nonterminal to X (except S). - -use strict; - -unless (@ARGV > 1){ - die "Usage: $0 [ ... ] \n"; -} -my $weight_file = shift @ARGV; - -$ENV{"LC_ALL"} = "C"; -local(*GRAMMAR, *OUT_GRAMMAR, *WEIGHTS); - -my %weights; -unless (open(WEIGHTS, $weight_file)) {die "Could not open weight file $weight_file\n" } -while (){ - if (/(.+) (.+)$/){ - $weights{$1} = $2; - } -} -close(WEIGHTS); -unless (keys(%weights)){ - die "Could not find any PhraseModel features in weight file (perhaps you specified the wrong file?)\n\n". - "Usage: $0 [ ... ] \n"; -} - -sub cleanup_and_die; -$SIG{INT} = "cleanup_and_die"; -$SIG{TERM} = "cleanup_and_die"; -$SIG{HUP} = "cleanup_and_die"; - -open(OUT_GRAMMAR, ">grammar.tmp"); -while (my $grammar_file = shift @ARGV){ - unless (open(GRAMMAR, $grammar_file)) {die "Could not open grammar file $grammar_file\n"} - while (){ - if (/^((.*\|{3}){3})(.*)$/){ - my $rule = $1; - my $rest = $3; - my $coarse_rule = $rule; - $coarse_rule =~ s/\[X[^\],]*/[X/g; - print OUT_GRAMMAR "$coarse_rule $rule $rest\n"; - } else { - die "Unrecognized rule format: $_\n"; - } - } - close(GRAMMAR); -} -close(OUT_GRAMMAR); - -`sort grammar.tmp > grammar.tmp.sorted`; -sub dump_rules; -sub compute_score; -unless (open(GRAMMAR, "grammar.tmp.sorted")){ die "Something went wrong; could not open intermediate file grammar.tmp.sorted\n"}; -my $prev_coarse_rule = ""; -my $best_features = ""; -my $best_score = 0; -my @rules = (); -while (){ - if (/^\s*((\S.*\|{3}\s*){3})((\S.*\|{3}\s*){3})(.*)$/){ - my $coarse_rule = $1; - my $fine_rule = $3; - my $features = $5; # This code does not correctly handle rules with other info (e.g. alignments) - if ($coarse_rule eq $prev_coarse_rule){ - my $score = compute_score($features, %weights); - if ($score > $best_score){ - $best_score = $score; - $best_features = $features; - } - } else { - dump_rules($prev_coarse_rule, $best_features, @rules); - $prev_coarse_rule = $coarse_rule; - $best_features = $features; - $best_score = compute_score($features, %weights); - @rules = (); - } - push(@rules, "$fine_rule$features\n"); - } else { - die "Something went wrong during grammar projection: $_\n"; - } -} -dump_rules($prev_coarse_rule, $best_features, @rules); -close(GRAMMAR); -cleanup(); - -sub compute_score { - my($features, %weights) = @_; - my $score = 0; - if ($features =~ s/^\s*(\S.*\S)\s*$/$1/) { - my @features = split(/\s+/, $features); - my $pm=0; - for my $feature (@features) { - my $feature_name; - my $feature_val; - if ($feature =~ /(.*)=(.*)/){ - $feature_name = $1; - $feature_val= $2; - } else { - $feature_name = "PhraseModel_" . $pm; - $feature_val= $feature; - } - $pm++; - if ($weights{$feature_name}){ - $score += $weights{$feature_name} * $feature_val; - } - } - } else { - die "Unexpected feature value format: $features\n"; - } - return $score; -} - -sub dump_rules { - my($coarse_rule, $coarse_rule_scores, @fine_rules) = @_; - unless($coarse_rule){ return; } - print "$coarse_rule $coarse_rule_scores\n"; - for my $rule (@fine_rules){ - print "\t$rule"; - } -} - -sub cleanup_and_die { - cleanup(); - die "\n"; -} - -sub cleanup { - `rm -rf grammar.tmp grammar.tmp.sorted`; -} - - - - -- cgit v1.2.3