summaryrefslogtreecommitdiff
path: root/corpus/filter-length.pl
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-01-21 16:53:05 -0500
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-01-21 16:53:05 -0500
commite1f71a6ce868d116f04082b697a8d61afcd625f1 (patch)
tree0b026f8f44b5193ff6d9561990e6eada19f7e863 /corpus/filter-length.pl
parent7cac43b858f3b681555bf0578f54b1f822c43207 (diff)
a little bit of cleanup
Diffstat (limited to 'corpus/filter-length.pl')
-rwxr-xr-xcorpus/filter-length.pl4
1 files changed, 2 insertions, 2 deletions
diff --git a/corpus/filter-length.pl b/corpus/filter-length.pl
index 38837f14..2e257cda 100755
--- a/corpus/filter-length.pl
+++ b/corpus/filter-length.pl
@@ -44,11 +44,11 @@ while(<F>) {
$lines++;
if ($lines % 100000 == 0) { print STDERR " [$lines]\n"; }
elsif ($lines % 2500 == 0) { print STDERR "."; }
- my ($sf, $se, @d) = split / \|\|\| /;
+ my ($sf, $se, @d) = split /\s*\|\|\|\s*/;
if (scalar @d != 0 or !defined $se) {
$bad_format++;
if ($bad_format > 100 && ($bad_format / $lines) > 0.02) {
- die "Corpus appears to be incorretly formatted, example: $_";
+ die "$bad_format / $lines : Corpus appears to be incorretly formatted, example: $_";
}
next;
}