diff options
author | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-25 21:03:19 +0000 |
---|---|---|
committer | redpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-25 21:03:19 +0000 |
commit | 1a6e8eb7b5f848079f162fb2fca49e81cdabb698 (patch) | |
tree | b94b2e6390ed15cb9a7ff537a90b3184bf44169d /gi/pipeline/scripts | |
parent | 849cf89882c84e519d7ddae710b0dd4916f75ad5 (diff) |
configure number of translations to keep
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@410 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pipeline/scripts')
-rwxr-xr-x | gi/pipeline/scripts/filter-by-f.pl | 7 | ||||
-rwxr-xr-x | gi/pipeline/scripts/refilter.pl | 5 |
2 files changed, 9 insertions, 3 deletions
diff --git a/gi/pipeline/scripts/filter-by-f.pl b/gi/pipeline/scripts/filter-by-f.pl index 3dd03bdd..0cef0606 100755 --- a/gi/pipeline/scripts/filter-by-f.pl +++ b/gi/pipeline/scripts/filter-by-f.pl @@ -8,13 +8,16 @@ my $REFILTER="$SCRIPT_DIR/refilter.pl"; my $SORT="$SCRIPT_DIR/sort-by-key.sh"; assert_exec($REKEY, $REFILTER, $SORT); -die "Usage: $0 ingrammar.gz outgrammar.gz\n" unless scalar @ARGV == 2; + +die "Usage: $0 NUM-TRANSLATIONS ingrammar.gz outgrammar.gz\n" unless scalar @ARGV == 3; +my $translations = shift @ARGV; +die "Need number: $translations" unless $translations > 0; die unless $ARGV[0] =~ /\.gz$/; die unless $ARGV[1] =~ /\.gz$/; die if $ARGV[0] eq $ARGV[1]; die "Can't find $ARGV[0]" unless -f $ARGV[0]; -my $cmd = "gunzip -c $ARGV[0] | $REKEY | $SORT | $REFILTER | gzip > $ARGV[1]"; +my $cmd = "gunzip -c $ARGV[0] | $REKEY | $SORT | $REFILTER $translations | gzip > $ARGV[1]"; safesystem($ARGV[1], $cmd) or die "Filtering failed"; exit 0; diff --git a/gi/pipeline/scripts/refilter.pl b/gi/pipeline/scripts/refilter.pl index 11a36ebe..a783eb4e 100755 --- a/gi/pipeline/scripts/refilter.pl +++ b/gi/pipeline/scripts/refilter.pl @@ -1,7 +1,10 @@ #!/usr/bin/perl -w use strict; -my $NUM_TRANSLATIONS = 30; +my $NUM_TRANSLATIONS = shift @ARGV; +unless ($NUM_TRANSLATIONS) { $NUM_TRANSLATIONS=30; } +print STDERR "KEEPING $NUM_TRANSLATIONS TRANSLATIONS FOR SOURCE\n"; + my $pk = ''; my %dict; while(<>) { |