summaryrefslogtreecommitdiff
path: root/corpus/filter-length.pl
diff options
context:
space:
mode:
Diffstat (limited to 'corpus/filter-length.pl')
-rwxr-xr-xcorpus/filter-length.pl4
1 files changed, 2 insertions, 2 deletions
diff --git a/corpus/filter-length.pl b/corpus/filter-length.pl
index 38837f14..2e257cda 100755
--- a/corpus/filter-length.pl
+++ b/corpus/filter-length.pl
@@ -44,11 +44,11 @@ while(<F>) {
$lines++;
if ($lines % 100000 == 0) { print STDERR " [$lines]\n"; }
elsif ($lines % 2500 == 0) { print STDERR "."; }
- my ($sf, $se, @d) = split / \|\|\| /;
+ my ($sf, $se, @d) = split /\s*\|\|\|\s*/;
if (scalar @d != 0 or !defined $se) {
$bad_format++;
if ($bad_format > 100 && ($bad_format / $lines) > 0.02) {
- die "Corpus appears to be incorretly formatted, example: $_";
+ die "$bad_format / $lines : Corpus appears to be incorretly formatted, example: $_";
}
next;
}