diff options
author | Patrick Simianer <p@simianer.de> | 2017-08-04 16:24:07 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2017-08-04 16:24:07 +0200 |
commit | bfb46f5480411e8021b46b20b081437b49742fd5 (patch) | |
tree | 551efeaf27e49f8f1125b5f39876e47afe86fd49 | |
parent | 9d5aaf992e6de183b7e8772a6c228e98b9914193 (diff) | |
parent | 447087fa8ebeee757c56f636aef95b2b628009c5 (diff) |
Merge branch 'master' of github.com:pks/nlp_scripts
-rwxr-xr-x | filter-illegal | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/filter-illegal b/filter-illegal new file mode 100755 index 0000000..8b29f3e --- /dev/null +++ b/filter-illegal @@ -0,0 +1,25 @@ +#!/usr/bin/env ruby + +require 'zipf' + +illegal = [ "[", "]", "|||" ] + +in0 = File.new ARGV[0] +in1 = File.new ARGV[1] + +i = 0 +skipi = [] +while line0 = in0.gets + line1 = in1.gets + skip = false + illegal.each { |k| + if line0.index(k) or line1.index(k) then + skip = true + skipi << i + end + } + i += 1 +end + +skipi.each { |j| puts j } + |