summaryrefslogtreecommitdiff
path: root/lib/nlp_ruby/ttable.rb
blob: 14d6c5d701d793002f8bc98ec92db4df676f76e2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# table['some French string'] = [Array of English strings]
def read_phrase_table fn
  table = {}
  f = ReadFile.new fn
  while raw_rule = f.gets
    french, english, features = splitpipe(raw_rule)
    feature_map = read_feature_string(features)
    if table.has_key? french
      table[french] << [english, feature_map ]
    else
      table[french] = [[english, feature_map]]
    end
  end
  f.close
  return table
end

# FIXME
class Translation
  attr_accessor :id, :s, :raw, :f, :score, :rank, :other_score

  def initialize id=nil, raw=nil, s=nil, f=nil, score=nil, rank=nil, other_score=nil
    @id = id
    @raw = raw
    @s = s
    @f = f
    @score = score
    @rank = rank
    @other_score = other_score
  end

  def from_s t, strip_alignment=true, rank=nil
    id, raw, features, score = splitpipe(t, 3)
    raw.strip!
    @raw = raw
    if strip_alignment # the way moses does it
      @s = @raw.gsub(/\s*\|\d+-\d+\||\|-?\d+\|\s*/, ' ').gsub(/\s+/, ' ')
      @s.strip!
    else
      @s = raw
    end
    @id = id.to_i
    @f = read_feature_string features
    @score = score.to_f
    @rank = rank
    @other_score = nil
  end

  def to_s
    [@id, @s, @f.to_kv, @score].join ' ||| '
  end

  def to_s2
    [@rank, @s, @f.to_kv, @score, @other_score].join ' ||| '
  end
end

def read_kbest_lists fn, translation_type=Translation
  kbest_lists = []
  cur = []
  f = ReadFile.new fn
  prev = -1
  c = 0
  id = 0
  while line = f.gets
    t = translation_type.new
    t.from_s line
    c = splitpipe(line)[0].to_i
    if c != prev
      if cur.size > 0
        kbest_lists << cur
        cur = []
      end
      prev = c
      id = 0
    end
    t.id = id
    cur << t
    id += 1
  end
  kbest_lists << cur # last one
  f.close
  return kbest_lists
end