From dfbc278c1057555fda9312291c8024049e00b7d8 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 10 Mar 2012 16:42:12 -0500 Subject: frequency-based binning --- gi/pf/make-freq-bins.pl | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100755 gi/pf/make-freq-bins.pl (limited to 'gi/pf/make-freq-bins.pl') diff --git a/gi/pf/make-freq-bins.pl b/gi/pf/make-freq-bins.pl new file mode 100755 index 00000000..fdcd3555 --- /dev/null +++ b/gi/pf/make-freq-bins.pl @@ -0,0 +1,26 @@ +#!/usr/bin/perl -w +use strict; + +my $BASE = 6; +my $CUTOFF = 3; + +my %d; +my $num = 0; +while(<>){ + chomp; + my @words = split /\s+/; + for my $w (@words) {$d{$w}++; $num++;} +} + +my @vocab = sort {$d{$b} <=> $d{$a}} keys %d; + +for (my $i=0; $i