summaryrefslogtreecommitdiff
path: root/word-aligner/extract_vocab.pl
diff options
context:
space:
mode:
Diffstat (limited to 'word-aligner/extract_vocab.pl')
-rwxr-xr-xword-aligner/extract_vocab.pl20
1 files changed, 0 insertions, 20 deletions
diff --git a/word-aligner/extract_vocab.pl b/word-aligner/extract_vocab.pl
deleted file mode 100755
index 070d4202..00000000
--- a/word-aligner/extract_vocab.pl
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-
-print STDERR "Extracting vocabulary...\n";
-my %dict = ();
-my $wc = 0;
-while(<>) {
- chomp;
- my @words = split /\s+/;
- for my $word (@words) { $wc++; $dict{$word}++; }
-}
-
-my $tc = 0;
-for my $word (sort {$dict{$b} <=> $dict{$a}} keys %dict) {
- print "$word\n";
- $tc++;
-}
-
-print STDERR "$tc types / $wc tokens\n";
-