summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2017-08-04 16:24:07 +0200
committerPatrick Simianer <p@simianer.de>2017-08-04 16:24:07 +0200
commitbfb46f5480411e8021b46b20b081437b49742fd5 (patch)
tree551efeaf27e49f8f1125b5f39876e47afe86fd49
parent9d5aaf992e6de183b7e8772a6c228e98b9914193 (diff)
parent447087fa8ebeee757c56f636aef95b2b628009c5 (diff)
Merge branch 'master' of github.com:pks/nlp_scripts
-rwxr-xr-xfilter-illegal25
1 files changed, 25 insertions, 0 deletions
diff --git a/filter-illegal b/filter-illegal
new file mode 100755
index 0000000..8b29f3e
--- /dev/null
+++ b/filter-illegal
@@ -0,0 +1,25 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+
+illegal = [ "[", "]", "|||" ]
+
+in0 = File.new ARGV[0]
+in1 = File.new ARGV[1]
+
+i = 0
+skipi = []
+while line0 = in0.gets
+ line1 = in1.gets
+ skip = false
+ illegal.each { |k|
+ if line0.index(k) or line1.index(k) then
+ skip = true
+ skipi << i
+ end
+ }
+ i += 1
+end
+
+skipi.each { |j| puts j }
+