diff options
Diffstat (limited to 'gi/pipeline/scripts')
| -rwxr-xr-x | gi/pipeline/scripts/filter-by-f.pl | 7 | ||||
| -rwxr-xr-x | gi/pipeline/scripts/refilter.pl | 5 | 
2 files changed, 9 insertions, 3 deletions
diff --git a/gi/pipeline/scripts/filter-by-f.pl b/gi/pipeline/scripts/filter-by-f.pl index 3dd03bdd..0cef0606 100755 --- a/gi/pipeline/scripts/filter-by-f.pl +++ b/gi/pipeline/scripts/filter-by-f.pl @@ -8,13 +8,16 @@ my $REFILTER="$SCRIPT_DIR/refilter.pl";  my $SORT="$SCRIPT_DIR/sort-by-key.sh";  assert_exec($REKEY, $REFILTER, $SORT); -die "Usage: $0 ingrammar.gz outgrammar.gz\n" unless scalar @ARGV == 2; + +die "Usage: $0 NUM-TRANSLATIONS ingrammar.gz outgrammar.gz\n" unless scalar @ARGV == 3; +my $translations = shift @ARGV; +die "Need number: $translations" unless $translations > 0;  die unless $ARGV[0] =~ /\.gz$/;  die unless $ARGV[1] =~ /\.gz$/;  die if $ARGV[0] eq $ARGV[1];  die "Can't find $ARGV[0]" unless -f $ARGV[0]; -my $cmd = "gunzip -c $ARGV[0] | $REKEY | $SORT | $REFILTER | gzip > $ARGV[1]"; +my $cmd = "gunzip -c $ARGV[0] | $REKEY | $SORT | $REFILTER $translations | gzip > $ARGV[1]";  safesystem($ARGV[1], $cmd) or die "Filtering failed";  exit 0; diff --git a/gi/pipeline/scripts/refilter.pl b/gi/pipeline/scripts/refilter.pl index 11a36ebe..a783eb4e 100755 --- a/gi/pipeline/scripts/refilter.pl +++ b/gi/pipeline/scripts/refilter.pl @@ -1,7 +1,10 @@  #!/usr/bin/perl -w  use strict; -my $NUM_TRANSLATIONS = 30; +my $NUM_TRANSLATIONS = shift @ARGV; +unless ($NUM_TRANSLATIONS) { $NUM_TRANSLATIONS=30; } +print STDERR "KEEPING $NUM_TRANSLATIONS TRANSLATIONS FOR SOURCE\n"; +  my $pk = '';  my %dict;  while(<>) {  | 
