summaryrefslogtreecommitdiff
path: root/corpus/filter-length.pl
diff options
context:
space:
mode:
Diffstat (limited to 'corpus/filter-length.pl')
-rwxr-xr-xcorpus/filter-length.pl6
1 files changed, 4 insertions, 2 deletions
diff --git a/corpus/filter-length.pl b/corpus/filter-length.pl
index 70032ca7..3cfa40cc 100755
--- a/corpus/filter-length.pl
+++ b/corpus/filter-length.pl
@@ -3,8 +3,8 @@ use strict;
use utf8;
##### EDIT THESE SETTINGS ####################################################
-my $MAX_LENGTH = 99; # discard a sentence if it is longer than this
-my $AUTOMATIC_INCLUDE_IF_SHORTER_THAN = 6; # if both are shorter, include
+my $MAX_LENGTH = 150; # discard a sentence if it is longer than this
+my $AUTOMATIC_INCLUDE_IF_SHORTER_THAN = 7; # if both are shorter, include
my $MAX_ZSCORE = 1.8; # how far from the mean can the (log)ratio be?
##############################################################################
@@ -128,6 +128,8 @@ while(<F>) {
next;
}
print;
+ } else {
+ print;
}
$to++;
}