summaryrefslogtreecommitdiff
path: root/gi/pipeline/scripts/refilter.pl
diff options
context:
space:
mode:
Diffstat (limited to 'gi/pipeline/scripts/refilter.pl')
-rwxr-xr-xgi/pipeline/scripts/refilter.pl40
1 files changed, 0 insertions, 40 deletions
diff --git a/gi/pipeline/scripts/refilter.pl b/gi/pipeline/scripts/refilter.pl
deleted file mode 100755
index a783eb4e..00000000
--- a/gi/pipeline/scripts/refilter.pl
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-my $NUM_TRANSLATIONS = shift @ARGV;
-unless ($NUM_TRANSLATIONS) { $NUM_TRANSLATIONS=30; }
-print STDERR "KEEPING $NUM_TRANSLATIONS TRANSLATIONS FOR SOURCE\n";
-
-my $pk = '';
-my %dict;
-while(<>) {
- s/^(.+)\t//;
- my $key = $1;
- if ($key ne $pk) {
- if ($pk) {
- emit_dict();
- }
- %dict = ();
- $pk = $key;
- }
- my ($lhs, $f, $e, $s) = split / \|\|\| /;
- my $score = 0;
- if ($s =~ /XEF=([^ ]+)/) {
- $score += $1;
- } else { die; }
- if ($s =~ /GenerativeProb=([^ ]+)/) {
- $score += ($1 / 10);
- } else { die; }
- $dict{"$lhs ||| $f ||| $e ||| $s"} = $score;
-}
-emit_dict();
-
-sub emit_dict {
- my $cc = 0;
- for my $k (sort { $dict{$a} <=> $dict{$b} } keys %dict) {
- print "$k";
- $cc++;
- if ($cc >= $NUM_TRANSLATIONS) { last; }
- }
-}
-