summaryrefslogtreecommitdiff
path: root/gi/pf/make-freq-bins.pl
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2012-03-13 09:24:47 +0100
committerPatrick Simianer <p@simianer.de>2012-03-13 09:24:47 +0100
commitc3a9ea64251605532c7954959662643a6a927bb7 (patch)
treefed6048a5acdaf3834740107771c2bc48f26fd4d /gi/pf/make-freq-bins.pl
parent867bca3e5fa0cdd63bf032e5859fb5092d9a4ca1 (diff)
parenta45af4a3704531a8382cd231f6445b3a33b598a3 (diff)
merge with upstream
Diffstat (limited to 'gi/pf/make-freq-bins.pl')
-rwxr-xr-xgi/pf/make-freq-bins.pl26
1 files changed, 26 insertions, 0 deletions
diff --git a/gi/pf/make-freq-bins.pl b/gi/pf/make-freq-bins.pl
new file mode 100755
index 00000000..fdcd3555
--- /dev/null
+++ b/gi/pf/make-freq-bins.pl
@@ -0,0 +1,26 @@
+#!/usr/bin/perl -w
+use strict;
+
+my $BASE = 6;
+my $CUTOFF = 3;
+
+my %d;
+my $num = 0;
+while(<>){
+ chomp;
+ my @words = split /\s+/;
+ for my $w (@words) {$d{$w}++; $num++;}
+}
+
+my @vocab = sort {$d{$b} <=> $d{$a}} keys %d;
+
+for (my $i=0; $i<scalar @vocab; $i++) {
+ my $most = $d{$vocab[$i]};
+ my $least = 1;
+
+ my $nl = -int(log($most / $num) / log($BASE) + $CUTOFF);
+ if ($nl < 0) { $nl = 0; }
+ print "$vocab[$i] $nl\n"
+}
+
+