summaryrefslogtreecommitdiff
path: root/corpus/support/tokenizer.pl
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2012-12-05 20:27:30 -0500
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2012-12-05 20:27:30 -0500
commita86a37cbe2fb6ffdcf4374f180010a118fed1063 (patch)
tree21b67592f50e64552616163e04d51965d0e7de9f /corpus/support/tokenizer.pl
parentc81b0dc240f2233c3e5ecccd8982218115476f9a (diff)
remove logging, you should be using pv
Diffstat (limited to 'corpus/support/tokenizer.pl')
-rwxr-xr-xcorpus/support/tokenizer.pl9
1 files changed, 0 insertions, 9 deletions
diff --git a/corpus/support/tokenizer.pl b/corpus/support/tokenizer.pl
index 23be00a5..e9c3a37d 100755
--- a/corpus/support/tokenizer.pl
+++ b/corpus/support/tokenizer.pl
@@ -107,24 +107,15 @@ my $orig_token_total = 0;
my $deep_proc_token_total = 0;
my $new_token_total = 0;
-my $line_total = 0;
-my $content_line_total = 0;
-
while(<STDIN>){
chomp();
- $line_total ++;
- if ($line_total % 100000 == 0) { print STDERR " [$line_total]\n"; }
- elsif ($line_total % 2500 == 0) { print STDERR "."; }
-
if(/^(\[b\s+|\]b|\]f|\[f\s+)/ || (/^\[[bf]$/) || (/^\s*$/) || /^<DOC/ || /^<\/DOC/) {
## markup
print STDOUT "$_\n";
next;
}
- $content_line_total ++;
-
my $orig_num = 0;
my $deep_proc_num = 0;