summaryrefslogtreecommitdiff
path: root/gi/pipeline/scripts/refilter.pl
blob: 11a36ebeec9263d35d48111f7764d8e5269ade62 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/perl -w
use strict;

my $NUM_TRANSLATIONS = 30;
my $pk = '';
my %dict;
while(<>) {
  s/^(.+)\t//;
  my $key = $1;
  if ($key ne $pk) {
    if ($pk) {
      emit_dict();
    }
    %dict = ();
    $pk = $key;
  }
  my ($lhs, $f, $e, $s) = split / \|\|\| /;
  my $score = 0;
  if ($s =~ /XEF=([^ ]+)/) {
    $score += $1;
  } else { die; }
  if ($s =~ /GenerativeProb=([^ ]+)/) {
    $score += ($1 / 10);
  } else { die; }
  $dict{"$lhs ||| $f ||| $e ||| $s"} = $score;
}
emit_dict();

sub emit_dict {
  my $cc = 0;
  for my $k (sort { $dict{$a} <=> $dict{$b} } keys %dict) {
    print "$k";
    $cc++;
    if ($cc >= $NUM_TRANSLATIONS) { last; }
  }
}