summaryrefslogtreecommitdiff
path: root/gi/pipeline/scripts
diff options
context:
space:
mode:
authorredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-25 21:03:19 +0000
committerredpony <redpony@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-25 21:03:19 +0000
commit7e54f38591ab2aaa0c9a8738425e7f388fec7a1d (patch)
tree9e97444d03f39d6c53bb78295d30d733678a853f /gi/pipeline/scripts
parent65a47d21082deb41aceb6516212568408bddaeac (diff)
configure number of translations to keep
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@410 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pipeline/scripts')
-rwxr-xr-xgi/pipeline/scripts/filter-by-f.pl7
-rwxr-xr-xgi/pipeline/scripts/refilter.pl5
2 files changed, 9 insertions, 3 deletions
diff --git a/gi/pipeline/scripts/filter-by-f.pl b/gi/pipeline/scripts/filter-by-f.pl
index 3dd03bdd..0cef0606 100755
--- a/gi/pipeline/scripts/filter-by-f.pl
+++ b/gi/pipeline/scripts/filter-by-f.pl
@@ -8,13 +8,16 @@ my $REFILTER="$SCRIPT_DIR/refilter.pl";
my $SORT="$SCRIPT_DIR/sort-by-key.sh";
assert_exec($REKEY, $REFILTER, $SORT);
-die "Usage: $0 ingrammar.gz outgrammar.gz\n" unless scalar @ARGV == 2;
+
+die "Usage: $0 NUM-TRANSLATIONS ingrammar.gz outgrammar.gz\n" unless scalar @ARGV == 3;
+my $translations = shift @ARGV;
+die "Need number: $translations" unless $translations > 0;
die unless $ARGV[0] =~ /\.gz$/;
die unless $ARGV[1] =~ /\.gz$/;
die if $ARGV[0] eq $ARGV[1];
die "Can't find $ARGV[0]" unless -f $ARGV[0];
-my $cmd = "gunzip -c $ARGV[0] | $REKEY | $SORT | $REFILTER | gzip > $ARGV[1]";
+my $cmd = "gunzip -c $ARGV[0] | $REKEY | $SORT | $REFILTER $translations | gzip > $ARGV[1]";
safesystem($ARGV[1], $cmd) or die "Filtering failed";
exit 0;
diff --git a/gi/pipeline/scripts/refilter.pl b/gi/pipeline/scripts/refilter.pl
index 11a36ebe..a783eb4e 100755
--- a/gi/pipeline/scripts/refilter.pl
+++ b/gi/pipeline/scripts/refilter.pl
@@ -1,7 +1,10 @@
#!/usr/bin/perl -w
use strict;
-my $NUM_TRANSLATIONS = 30;
+my $NUM_TRANSLATIONS = shift @ARGV;
+unless ($NUM_TRANSLATIONS) { $NUM_TRANSLATIONS=30; }
+print STDERR "KEEPING $NUM_TRANSLATIONS TRANSLATIONS FOR SOURCE\n";
+
my $pk = '';
my %dict;
while(<>) {