summaryrefslogtreecommitdiff
path: root/corpus/support/tokenizer.pl
diff options
context:
space:
mode:
Diffstat (limited to 'corpus/support/tokenizer.pl')
-rwxr-xr-xcorpus/support/tokenizer.pl9
1 files changed, 0 insertions, 9 deletions
diff --git a/corpus/support/tokenizer.pl b/corpus/support/tokenizer.pl
index 23be00a5..e9c3a37d 100755
--- a/corpus/support/tokenizer.pl
+++ b/corpus/support/tokenizer.pl
@@ -107,24 +107,15 @@ my $orig_token_total = 0;
my $deep_proc_token_total = 0;
my $new_token_total = 0;
-my $line_total = 0;
-my $content_line_total = 0;
-
while(<STDIN>){
chomp();
- $line_total ++;
- if ($line_total % 100000 == 0) { print STDERR " [$line_total]\n"; }
- elsif ($line_total % 2500 == 0) { print STDERR "."; }
-
if(/^(\[b\s+|\]b|\]f|\[f\s+)/ || (/^\[[bf]$/) || (/^\s*$/) || /^<DOC/ || /^<\/DOC/) {
## markup
print STDOUT "$_\n";
next;
}
- $content_line_total ++;
-
my $orig_num = 0;
my $deep_proc_num = 0;