summaryrefslogtreecommitdiff
path: root/gi/pipeline/scripts/refilter.pl
diff options
context:
space:
mode:
Diffstat (limited to 'gi/pipeline/scripts/refilter.pl')
-rwxr-xr-xgi/pipeline/scripts/refilter.pl37
1 files changed, 37 insertions, 0 deletions
diff --git a/gi/pipeline/scripts/refilter.pl b/gi/pipeline/scripts/refilter.pl
new file mode 100755
index 00000000..11a36ebe
--- /dev/null
+++ b/gi/pipeline/scripts/refilter.pl
@@ -0,0 +1,37 @@
+#!/usr/bin/perl -w
+use strict;
+
+my $NUM_TRANSLATIONS = 30;
+my $pk = '';
+my %dict;
+while(<>) {
+ s/^(.+)\t//;
+ my $key = $1;
+ if ($key ne $pk) {
+ if ($pk) {
+ emit_dict();
+ }
+ %dict = ();
+ $pk = $key;
+ }
+ my ($lhs, $f, $e, $s) = split / \|\|\| /;
+ my $score = 0;
+ if ($s =~ /XEF=([^ ]+)/) {
+ $score += $1;
+ } else { die; }
+ if ($s =~ /GenerativeProb=([^ ]+)/) {
+ $score += ($1 / 10);
+ } else { die; }
+ $dict{"$lhs ||| $f ||| $e ||| $s"} = $score;
+}
+emit_dict();
+
+sub emit_dict {
+ my $cc = 0;
+ for my $k (sort { $dict{$a} <=> $dict{$b} } keys %dict) {
+ print "$k";
+ $cc++;
+ if ($cc >= $NUM_TRANSLATIONS) { last; }
+ }
+}
+