diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2012-12-05 20:27:30 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2012-12-05 20:27:30 -0500 |
commit | a86a37cbe2fb6ffdcf4374f180010a118fed1063 (patch) | |
tree | 21b67592f50e64552616163e04d51965d0e7de9f /corpus/support/tokenizer.pl | |
parent | c81b0dc240f2233c3e5ecccd8982218115476f9a (diff) |
remove logging, you should be using pv
Diffstat (limited to 'corpus/support/tokenizer.pl')
-rwxr-xr-x | corpus/support/tokenizer.pl | 9 |
1 files changed, 0 insertions, 9 deletions
diff --git a/corpus/support/tokenizer.pl b/corpus/support/tokenizer.pl index 23be00a5..e9c3a37d 100755 --- a/corpus/support/tokenizer.pl +++ b/corpus/support/tokenizer.pl @@ -107,24 +107,15 @@ my $orig_token_total = 0; my $deep_proc_token_total = 0; my $new_token_total = 0; -my $line_total = 0; -my $content_line_total = 0; - while(<STDIN>){ chomp(); - $line_total ++; - if ($line_total % 100000 == 0) { print STDERR " [$line_total]\n"; } - elsif ($line_total % 2500 == 0) { print STDERR "."; } - if(/^(\[b\s+|\]b|\]f|\[f\s+)/ || (/^\[[bf]$/) || (/^\s*$/) || /^<DOC/ || /^<\/DOC/) { ## markup print STDOUT "$_\n"; next; } - $content_line_total ++; - my $orig_num = 0; my $deep_proc_num = 0; |