diff options
author | Patrick Simianer <p@simianer.de> | 2017-12-13 09:16:16 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2017-12-13 09:16:16 +0100 |
commit | 5a53215ed46e12db68cdd321a6e1228956b163e0 (patch) | |
tree | e37f59192de0bc5f1f56f09e3520be7a153e1a3f /filter-tokens | |
parent | a6b0615d945313dd1c058eb97cc02c460853411e (diff) |
filter-tokens
Diffstat (limited to 'filter-tokens')
-rwxr-xr-x | filter-tokens | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/filter-tokens b/filter-tokens new file mode 100755 index 0000000..00c8f2c --- /dev/null +++ b/filter-tokens @@ -0,0 +1,23 @@ +#!/usr/bin/env ruby + +require 'zipf' + +bad_words = {} +ReadFile.readlines_strip(ARGV[0]).each { |line| + bad_words[line] = true +} + +i = 0 +while line = STDIN.gets + bad = false + tokens = line.split + bad_words.keys.each { |w| + if tokens.include? w + bad = true + break + end + } + puts i if bad + i += 1 +end + |