summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xadd-index12
-rwxr-xr-xexclude17
-rwxr-xr-xjoint-set30
3 files changed, 59 insertions, 0 deletions
diff --git a/add-index b/add-index
new file mode 100755
index 0000000..77a7e8d
--- /dev/null
+++ b/add-index
@@ -0,0 +1,12 @@
+#!/usr/bin/env ruby
+
+i = 0
+if ARGV.size > 0
+ i = ARGV[0].to_i
+end
+
+while line = STDIN.gets
+ puts "#{i}\t#{line}"
+ i += 1
+end
+
diff --git a/exclude b/exclude
new file mode 100755
index 0000000..b5fe3cb
--- /dev/null
+++ b/exclude
@@ -0,0 +1,17 @@
+#!/usr/bin/env ruby
+
+require 'zipf'
+require 'set'
+
+to_exclude = {}
+f = ReadFile.new ARGV[0]
+while line = f.gets
+ to_exclude[line] = true
+end
+
+while line = STDIN.gets
+ if not to_exclude.has_key? line
+ puts line
+ end
+end
+
diff --git a/joint-set b/joint-set
new file mode 100755
index 0000000..b9b9b22
--- /dev/null
+++ b/joint-set
@@ -0,0 +1,30 @@
+#!/usr/bin/env ruby
+
+require 'set'
+require 'zipf'
+
+n = ARGV.pop.to_i
+
+all = []
+all_sets = []
+ARGV.each { |file|
+ fp = ReadFile.new file
+ a = []
+ s = Set.new
+ while line = fp.gets
+ a << line
+ s << line
+ end
+ all << a
+ all_sets << s
+}
+
+joint_set = all_sets.pop
+all_sets.each { |set|
+ joint_set &= set
+}
+
+joint_set.each { |i|
+ puts i
+}
+