From 81a637ae52d2a1d0bc751b44c193765cdc1091f1 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 16 Feb 2014 00:12:32 +0100 Subject: nlp_ruby 0.3 --- tf-idf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tf-idf') diff --git a/tf-idf b/tf-idf index e1502b3..ce3400a 100755 --- a/tf-idf +++ b/tf-idf @@ -15,7 +15,7 @@ def main stopwords = [] if cfg[:filter_stopwords] - stopwords = ReadFile.new(cfg[:filter_stopwords]).readlines.map{ |i| + stopwords = ReadFile.readlines(cfg[:filter_stopwords]).map{ |i| i.split('|').first.strip }.reject{ |i| i=='' } end @@ -23,17 +23,17 @@ def main docs = {} cfg[:documents].each { |i| if cfg[:one_item_per_line] - docs[i] = ReadFile.new(i).readlines_strip + docs[i] = ReadFile.readlines_strip i else - docs[i] = ReadFile.new(i).read.split(/\s/).map{ |i| i.strip } + docs[i] = ReadFile.read(i).split(/\s/).map{ |i| i.strip } end } - idf_values = idf docs + idf_values = TFIDF::idf docs docs.each_pair { |name, words| - just_tf = tf words, stopwords - just_tf = ntf(just_tf) if cfg[:ntf] + just_tf = TFIDF::tf words, stopwords + just_tf = TFIDF::ntf(just_tf) if cfg[:ntf] tf_idf = {}; tf_idf.default = 0.0 if cfg[:idf] just_tf.each_pair { |word,f| -- cgit v1.2.3