diff options
author | Patrick Simianer <p@simianer.de> | 2014-02-16 00:12:32 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-02-16 00:12:32 +0100 |
commit | 81a637ae52d2a1d0bc751b44c193765cdc1091f1 (patch) | |
tree | 19708fb523ef32cbeccc4d87133f115650e13280 /tf-idf | |
parent | 99ae15932eae7e727b74f723107cf42aad80ba3f (diff) |
nlp_ruby 0.3
Diffstat (limited to 'tf-idf')
-rwxr-xr-x | tf-idf | 12 |
1 files changed, 6 insertions, 6 deletions
@@ -15,7 +15,7 @@ def main stopwords = [] if cfg[:filter_stopwords] - stopwords = ReadFile.new(cfg[:filter_stopwords]).readlines.map{ |i| + stopwords = ReadFile.readlines(cfg[:filter_stopwords]).map{ |i| i.split('|').first.strip }.reject{ |i| i=='' } end @@ -23,17 +23,17 @@ def main docs = {} cfg[:documents].each { |i| if cfg[:one_item_per_line] - docs[i] = ReadFile.new(i).readlines_strip + docs[i] = ReadFile.readlines_strip i else - docs[i] = ReadFile.new(i).read.split(/\s/).map{ |i| i.strip } + docs[i] = ReadFile.read(i).split(/\s/).map{ |i| i.strip } end } - idf_values = idf docs + idf_values = TFIDF::idf docs docs.each_pair { |name, words| - just_tf = tf words, stopwords - just_tf = ntf(just_tf) if cfg[:ntf] + just_tf = TFIDF::tf words, stopwords + just_tf = TFIDF::ntf(just_tf) if cfg[:ntf] tf_idf = {}; tf_idf.default = 0.0 if cfg[:idf] just_tf.each_pair { |word,f| |