diff options
Diffstat (limited to 'gi/pipeline/scripts')
-rwxr-xr-x | gi/pipeline/scripts/filter-by-f.pl | 7 | ||||
-rwxr-xr-x | gi/pipeline/scripts/refilter.pl | 5 |
2 files changed, 9 insertions, 3 deletions
diff --git a/gi/pipeline/scripts/filter-by-f.pl b/gi/pipeline/scripts/filter-by-f.pl index 3dd03bdd..0cef0606 100755 --- a/gi/pipeline/scripts/filter-by-f.pl +++ b/gi/pipeline/scripts/filter-by-f.pl @@ -8,13 +8,16 @@ my $REFILTER="$SCRIPT_DIR/refilter.pl"; my $SORT="$SCRIPT_DIR/sort-by-key.sh"; assert_exec($REKEY, $REFILTER, $SORT); -die "Usage: $0 ingrammar.gz outgrammar.gz\n" unless scalar @ARGV == 2; + +die "Usage: $0 NUM-TRANSLATIONS ingrammar.gz outgrammar.gz\n" unless scalar @ARGV == 3; +my $translations = shift @ARGV; +die "Need number: $translations" unless $translations > 0; die unless $ARGV[0] =~ /\.gz$/; die unless $ARGV[1] =~ /\.gz$/; die if $ARGV[0] eq $ARGV[1]; die "Can't find $ARGV[0]" unless -f $ARGV[0]; -my $cmd = "gunzip -c $ARGV[0] | $REKEY | $SORT | $REFILTER | gzip > $ARGV[1]"; +my $cmd = "gunzip -c $ARGV[0] | $REKEY | $SORT | $REFILTER $translations | gzip > $ARGV[1]"; safesystem($ARGV[1], $cmd) or die "Filtering failed"; exit 0; diff --git a/gi/pipeline/scripts/refilter.pl b/gi/pipeline/scripts/refilter.pl index 11a36ebe..a783eb4e 100755 --- a/gi/pipeline/scripts/refilter.pl +++ b/gi/pipeline/scripts/refilter.pl @@ -1,7 +1,10 @@ #!/usr/bin/perl -w use strict; -my $NUM_TRANSLATIONS = 30; +my $NUM_TRANSLATIONS = shift @ARGV; +unless ($NUM_TRANSLATIONS) { $NUM_TRANSLATIONS=30; } +print STDERR "KEEPING $NUM_TRANSLATIONS TRANSLATIONS FOR SOURCE\n"; + my $pk = ''; my %dict; while(<>) { |