summaryrefslogtreecommitdiff
path: root/stats.rb
diff options
context:
space:
mode:
Diffstat (limited to 'stats.rb')
-rwxr-xr-xstats.rb38
1 files changed, 38 insertions, 0 deletions
diff --git a/stats.rb b/stats.rb
new file mode 100755
index 0000000..390cbd9
--- /dev/null
+++ b/stats.rb
@@ -0,0 +1,38 @@
+#!/usr/bin/env ruby
+
+require_relative 'util'
+
+def setup reference_file, stopwords_file
+ references = ReadFile.readlines_strip reference_file
+ stopwords = read_stopwords_file stopwords_file
+
+ return references, stopwords
+end
+
+def stats references, stopwords
+ references.each { |r|
+ types, uniq_types = get_types r, stopwords
+
+ counts = []
+ uniq_types.each { |t|
+ counts << types.count(t)
+ }
+ if counts.size > 0
+ puts counts.inject(:+) / counts.size.to_f
+ end
+ }
+end
+
+def main
+ config = Trollop::options do
+ opt :references, "File with references, truecased and tokenized", :type => :string, :short => "-r", :required => true
+ opt :stopwords, "File with stopwords, one per line", :type => :string, :short => "-s", :required => true
+ end
+
+ references, stopwords = setup config[:references], config[:stopwords]
+
+ stats references, stopwords
+end
+
+main
+