summaryrefslogtreecommitdiff
path: root/lib/nlp_ruby/misc.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/nlp_ruby/misc.rb')
-rw-r--r--lib/nlp_ruby/misc.rb74
1 files changed, 74 insertions, 0 deletions
diff --git a/lib/nlp_ruby/misc.rb b/lib/nlp_ruby/misc.rb
index 80d932c..0f58100 100644
--- a/lib/nlp_ruby/misc.rb
+++ b/lib/nlp_ruby/misc.rb
@@ -21,6 +21,40 @@ class Array
end
end
+class String
+
+ def downcase? s
+ s[/[[:lower:]]/]
+ end
+end
+
+class PriorityQueue
+# This assumes that elements in the queue
+# have a numerical member named 'score'.
+
+ def initialize a=Array.new
+ @queue = Array.new a
+ sort!
+ end
+
+ def sort!
+ @queue.sort_by! { |i| -i.score }
+ end
+
+ def pop
+ @queue.pop
+ end
+
+ def push i
+ @queue << i
+ sort!
+ end
+
+ def empty?
+ @queue.empty?
+ end
+end
+
def spawn_with_timeout cmd, t=4, debug=false
require 'timeout'
STDERR.write cmd+"\n" if debug
@@ -37,4 +71,44 @@ def spawn_with_timeout cmd, t=4, debug=false
return pipe_in.read
end
+def read_phrase_table fn
+ table = {}
+ f = ReadFile.new fn
+ while raw_rule = f.gets
+ french, english, features = splitpipe(raw_rule)
+ feature_map = read_feature_string(features)
+ if table.has_key? french
+ table[french] << [english, feature_map ]
+ else
+ table[french] = [[english, feature_map]]
+ end
+ end
+ f.close
+ return table
+end
+
+def cdec_kbest cdec_bin, input, ini, weights, k, unique=true
+ require 'open3'
+ cmd = "echo \"#{input}\" | #{cdec_bin} -c #{ini} -w #{weights} -k #{k}"
+ cmd += " -r" if unique
+ o,_ = Open3.capture2 "#{cmd} 2>/dev/null"
+ a = []; j = -1
+ o.split("\n").map{ |i| j+=1; t=Translation.new; t.from_s(i, false, j); a << t }
+ return a
+end
+
+def read_config fn
+ f = ReadFile.new fn
+ cfg = {}
+ while line = f.gets
+ line.strip!
+ next if /^\s*$/.match line
+ next if line[0]=='#'
+ content = line.split('#', 2).first
+ k, v = content.split(/\s*=\s*/, 2)
+ k.strip!; v.strip!
+ cfg[k] = v
+ end
+ return cfg
+end