summaryrefslogtreecommitdiff
path: root/stats.rb
blob: 390cbd93d706fc4b769606a190a1f8a4d029c066 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env ruby

require_relative 'util'

def setup reference_file, stopwords_file
  references = ReadFile.readlines_strip reference_file
  stopwords  = read_stopwords_file stopwords_file

  return references, stopwords
end

def stats references, stopwords
  references.each { |r|
    types, uniq_types = get_types r, stopwords

    counts = []
    uniq_types.each { |t|
      counts <<  types.count(t)
    }
    if counts.size > 0
      puts counts.inject(:+) / counts.size.to_f
    end
  }
end

def main
  config = Trollop::options do
    opt :references, "File with references, truecased and tokenized", :type => :string, :short => "-r", :required => true
    opt :stopwords, "File with stopwords, one per line",              :type => :string, :short => "-s", :required => true
  end

  references, stopwords = setup config[:references], config[:stopwords]

  stats references, stopwords
end

main