diff options
Diffstat (limited to 'filter-tokens')
-rwxr-xr-x | filter-tokens | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/filter-tokens b/filter-tokens new file mode 100755 index 0000000..00c8f2c --- /dev/null +++ b/filter-tokens @@ -0,0 +1,23 @@ +#!/usr/bin/env ruby + +require 'zipf' + +bad_words = {} +ReadFile.readlines_strip(ARGV[0]).each { |line| + bad_words[line] = true +} + +i = 0 +while line = STDIN.gets + bad = false + tokens = line.split + bad_words.keys.each { |w| + if tokens.include? w + bad = true + break + end + } + puts i if bad + i += 1 +end + |