diff options
Diffstat (limited to 'gi/pipeline/scripts/refilter.pl')
-rwxr-xr-x | gi/pipeline/scripts/refilter.pl | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/gi/pipeline/scripts/refilter.pl b/gi/pipeline/scripts/refilter.pl new file mode 100755 index 00000000..11a36ebe --- /dev/null +++ b/gi/pipeline/scripts/refilter.pl @@ -0,0 +1,37 @@ +#!/usr/bin/perl -w +use strict; + +my $NUM_TRANSLATIONS = 30; +my $pk = ''; +my %dict; +while(<>) { + s/^(.+)\t//; + my $key = $1; + if ($key ne $pk) { + if ($pk) { + emit_dict(); + } + %dict = (); + $pk = $key; + } + my ($lhs, $f, $e, $s) = split / \|\|\| /; + my $score = 0; + if ($s =~ /XEF=([^ ]+)/) { + $score += $1; + } else { die; } + if ($s =~ /GenerativeProb=([^ ]+)/) { + $score += ($1 / 10); + } else { die; } + $dict{"$lhs ||| $f ||| $e ||| $s"} = $score; +} +emit_dict(); + +sub emit_dict { + my $cc = 0; + for my $k (sort { $dict{$a} <=> $dict{$b} } keys %dict) { + print "$k"; + $cc++; + if ($cc >= $NUM_TRANSLATIONS) { last; } + } +} + |