summaryrefslogtreecommitdiff
path: root/lib/nlp_ruby/misc.rb
blob: 0319a5fd6b9f43bd9fa04b02d7c8bce1cbd3dc98 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
require 'timeout'


class Array
  def max_index
    self.index(self.max)
  end

  def is_subset_of? other
    self.each { |i|
      if other.include? i
       return false
      end
    }
    return true
  end

  def sum
    self.inject(:+)
  end

  def mean
    self.sum.to_f/self.size
  end
end

class String

  def downcase?
    self[/[[:lower:]]/]
  end
end

class PriorityQueue
# This assumes that elements in the queue
# have a numerical member named 'score'.

  def initialize a=Array.new
    @queue = Array.new a
    sort!
  end

  def sort!
    @queue.sort_by! { |i| -i.score }
  end

  def pop
    @queue.pop
  end

  def push i
    @queue << i
    sort!
  end

  def empty?
    @queue.empty?
  end
end

def spawn_with_timeout cmd, t=4, ignore_fail=false, debug=false
  STDERR.write cmd+"\n" if debug
  pipe_in, pipe_out = IO.pipe
  pid = Process.spawn(cmd, :out => pipe_out)
  begin
    Timeout.timeout(t) { Process.wait pid }
  rescue Timeout::Error
    Process.kill('TERM', pid) if !ignore_fail
  end
  pipe_out.close
  return pipe_in.read
end

def read_phrase_table fn
  table = {}
  f = ReadFile.new fn
  while raw_rule = f.gets
    french, english, features = splitpipe(raw_rule)
    feature_map = SparseVector.from_kv  features
    if table.has_key? french
      table[french] << [english, feature_map ]
    else
      table[french] = [[english, feature_map]]
    end
  end
  f.close
  return table
end

def cdec_kbest cdec_bin, input, ini, weights, k, unique=true
  require 'open3'
  cmd = "echo \"#{input}\" | #{cdec_bin} -c #{ini} -w #{weights} -k #{k}"
  cmd += " -r" if unique
  o,_ = Open3.capture2 "#{cmd}  2>/dev/null"
  a = []; j = -1
  o.split("\n").map{ |i| j+=1; t=Translation.new; t.from_s(i, false, j); a << t }
  return a
end

def read_config fn
  f = ReadFile.new fn
  cfg = {}
  while line = f.gets
    line.strip!
    next if /^\s*$/.match line
    next if line[0]=='#'
    content = line.split('#', 2).first
    k, v = content.split(/\s*=\s*/, 2)
    k.strip!; v.strip!
    cfg[k] = v
  end
  return cfg
end