summaryrefslogtreecommitdiff
path: root/lib/nlp_ruby/tfidf.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/nlp_ruby/tfidf.rb')
-rw-r--r--lib/nlp_ruby/tfidf.rb12
1 files changed, 9 insertions, 3 deletions
diff --git a/lib/nlp_ruby/tfidf.rb b/lib/nlp_ruby/tfidf.rb
index 84d55a5..13a40a3 100644
--- a/lib/nlp_ruby/tfidf.rb
+++ b/lib/nlp_ruby/tfidf.rb
@@ -1,6 +1,9 @@
+module TFIDF
+
+
# returns key='raw frequency' for an
# array-like object
-def tf array, stopwords=[]
+def TFIDF::tf array, stopwords=[]
v = {}; v.default = 0
array.uniq.each { |i|
next if stopwords.include? i
@@ -11,7 +14,7 @@ end
# smoothes raw frequencies of tf() in-place
# a is a smoothing term
-def ntf hash, a=0.4
+def TFIDF::ntf hash, a=0.4
max = hash.values.max.to_f
hash.each_pair { |k,v|
hash[k] = a + (1-a)*(v/max)
@@ -19,7 +22,7 @@ def ntf hash, a=0.4
end
# returns idf value for each word in a vocabulary
-def idf list_of_hashes
+def TFIDF::idf list_of_hashes
vocab = list_of_hashes.values.flatten.uniq
n = list_of_hashes.size.to_f
idf = {}
@@ -30,3 +33,6 @@ def idf list_of_hashes
return idf
end
+
+end #module
+