From 1a6e8eb7b5f848079f162fb2fca49e81cdabb698 Mon Sep 17 00:00:00 2001 From: redpony Date: Sun, 25 Jul 2010 21:03:19 +0000 Subject: configure number of translations to keep git-svn-id: https://ws10smt.googlecode.com/svn/trunk@410 ec762483-ff6d-05da-a07a-a48fb63a330f --- gi/pipeline/scripts/filter-by-f.pl | 7 +++++-- gi/pipeline/scripts/refilter.pl | 5 ++++- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'gi/pipeline/scripts') diff --git a/gi/pipeline/scripts/filter-by-f.pl b/gi/pipeline/scripts/filter-by-f.pl index 3dd03bdd..0cef0606 100755 --- a/gi/pipeline/scripts/filter-by-f.pl +++ b/gi/pipeline/scripts/filter-by-f.pl @@ -8,13 +8,16 @@ my $REFILTER="$SCRIPT_DIR/refilter.pl"; my $SORT="$SCRIPT_DIR/sort-by-key.sh"; assert_exec($REKEY, $REFILTER, $SORT); -die "Usage: $0 ingrammar.gz outgrammar.gz\n" unless scalar @ARGV == 2; + +die "Usage: $0 NUM-TRANSLATIONS ingrammar.gz outgrammar.gz\n" unless scalar @ARGV == 3; +my $translations = shift @ARGV; +die "Need number: $translations" unless $translations > 0; die unless $ARGV[0] =~ /\.gz$/; die unless $ARGV[1] =~ /\.gz$/; die if $ARGV[0] eq $ARGV[1]; die "Can't find $ARGV[0]" unless -f $ARGV[0]; -my $cmd = "gunzip -c $ARGV[0] | $REKEY | $SORT | $REFILTER | gzip > $ARGV[1]"; +my $cmd = "gunzip -c $ARGV[0] | $REKEY | $SORT | $REFILTER $translations | gzip > $ARGV[1]"; safesystem($ARGV[1], $cmd) or die "Filtering failed"; exit 0; diff --git a/gi/pipeline/scripts/refilter.pl b/gi/pipeline/scripts/refilter.pl index 11a36ebe..a783eb4e 100755 --- a/gi/pipeline/scripts/refilter.pl +++ b/gi/pipeline/scripts/refilter.pl @@ -1,7 +1,10 @@ #!/usr/bin/perl -w use strict; -my $NUM_TRANSLATIONS = 30; +my $NUM_TRANSLATIONS = shift @ARGV; +unless ($NUM_TRANSLATIONS) { $NUM_TRANSLATIONS=30; } +print STDERR "KEEPING $NUM_TRANSLATIONS TRANSLATIONS FOR SOURCE\n"; + my $pk = ''; my %dict; while(<>) { -- cgit v1.2.3