summaryrefslogtreecommitdiff
path: root/gi/pf/make-freq-bins.pl
blob: fdcd355557153e98cfbb241328a067bcbab39c17 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/usr/bin/perl -w
use strict;

my $BASE = 6;
my $CUTOFF = 3;

my %d;
my $num = 0;
while(<>){
 chomp;
 my @words = split /\s+/;
 for my $w (@words) {$d{$w}++; $num++;}
}

my @vocab = sort {$d{$b} <=> $d{$a}} keys %d;

for (my $i=0; $i<scalar @vocab; $i++) {
  my $most = $d{$vocab[$i]};
  my $least = 1;

  my $nl = -int(log($most / $num) / log($BASE) + $CUTOFF);
  if ($nl < 0) { $nl = 0; }
  print "$vocab[$i] $nl\n"
}