summaryrefslogtreecommitdiff
path: root/tsv-uniq
blob: fde79f27228d4db210b14d2b8fc93f46aeb7bbb2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env ruby

require 'set'

strictness = ARGV[0].to_i # 1 one-side
                          # 2 just the pair
                          # 3 the pair and one side

if strictness == 1 or strictness == 3
  side = ARGV[1].to_i # 0 or 1
end

segments = [[],[]]
while line = STDIN.gets
  src, tgt = line.strip.split "\t"
  segments[0] << src
  segments[1] << tgt
end

if strictness == 1
  seen = Set.new
  segments[side].each_with_index { |segment,i|
    if not seen.include? segment
      puts "#{segments[i][0]}\t#{segments[i][1]}"
    end
    seen << segment
  }
elsif strictness == 2
  seen = Set.new
  segments[0].each_index { |i|
    segment_pair = [segments[i][0], segments[i][1]]
    if not seen.include? segment_pair
      puts "#{segment_pair[0]}\t#{segment_pair[1]}"
    end
    seen << segment_pair
  }
elsif strictness == 3
  seen = Set.new
  seen_pairs = Set.new
  segments[side].each_with_index { |segment,i|
    segment_pair = [segments[0][i], segments[1][i]]
    if not seen_pairs.include? segment_pair and not seen.include? segment
      puts "#{segment_pair[0]}\t#{segment_pair[1]}"
    end
    seen << segment
    seen_pairs << segment_pair
  }
end