From 5a53215ed46e12db68cdd321a6e1228956b163e0 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Wed, 13 Dec 2017 09:16:16 +0100 Subject: filter-tokens --- filter-tokens | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100755 filter-tokens (limited to 'filter-tokens') diff --git a/filter-tokens b/filter-tokens new file mode 100755 index 0000000..00c8f2c --- /dev/null +++ b/filter-tokens @@ -0,0 +1,23 @@ +#!/usr/bin/env ruby + +require 'zipf' + +bad_words = {} +ReadFile.readlines_strip(ARGV[0]).each { |line| + bad_words[line] = true +} + +i = 0 +while line = STDIN.gets + bad = false + tokens = line.split + bad_words.keys.each { |w| + if tokens.include? w + bad = true + break + end + } + puts i if bad + i += 1 +end + -- cgit v1.2.3