From 8adea055298189643a3c7a76e2d529f536a94e11 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 23 Apr 2019 13:55:47 +0200 Subject: init --- stats.rb | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100755 stats.rb (limited to 'stats.rb') diff --git a/stats.rb b/stats.rb new file mode 100755 index 0000000..390cbd9 --- /dev/null +++ b/stats.rb @@ -0,0 +1,38 @@ +#!/usr/bin/env ruby + +require_relative 'util' + +def setup reference_file, stopwords_file + references = ReadFile.readlines_strip reference_file + stopwords = read_stopwords_file stopwords_file + + return references, stopwords +end + +def stats references, stopwords + references.each { |r| + types, uniq_types = get_types r, stopwords + + counts = [] + uniq_types.each { |t| + counts << types.count(t) + } + if counts.size > 0 + puts counts.inject(:+) / counts.size.to_f + end + } +end + +def main + config = Trollop::options do + opt :references, "File with references, truecased and tokenized", :type => :string, :short => "-r", :required => true + opt :stopwords, "File with stopwords, one per line", :type => :string, :short => "-s", :required => true + end + + references, stopwords = setup config[:references], config[:stopwords] + + stats references, stopwords +end + +main + -- cgit v1.2.3