#!/usr/bin/env ruby require 'trollop' require 'tempfile' require 'open3' # execute SMT_SEMPARSE = '/workspace/grounded/mosesdecoder/moses-chart-cmd/bin/gcc-4.7/release/debug-symbols-on/link-static/threading-multi/moses_chart -f /workspace/grounded/smt-semparse/latest/model/moses.ini 2>/dev/null' EVAL_PL = '/workspace/grounded/wasp-1.0/data/geo-funql/eval/eval.pl' def exec natural_language_string, reference_output flat_mrl = `echo "#{natural_language_string}" | ./stem.py | #{SMT_SEMPARSE}`.strip func = `echo "#{flat_mrl}" | ./functionalize.py 2>/dev/null`.strip res = `echo "execute_funql_query(#{func}, X)." | swipl -s #{EVAL_PL} 2>&1 | grep "X ="`.strip.split('X = ')[1] puts " nrl: #{natural_language_string}" puts "flat mrl: #{flat_mrl}" puts " func: #{func}" puts " output: #{res}" return res==reference_output, func, res end # decoder interaction/translations class Translation attr_accessor :s, :f, :rank, :model, :score def initialize kbest_line, rank=-1 a = kbest_line.split ' ||| ' @s = a[1].strip h = {} a[2].split.each { |i| name, value = i.split '=' value = value.to_f h[name] = value } @f = NamedSparseVector.new h @rank = rank @model = a[3].to_f @score = -1.0 end def to_s "#{@rank} ||| #{@s} ||| #{@model} ||| #{@score} ||| #{@f.to_s}" end end CDEC = "/toolbox/cdec-dtrain/bin/cdec -r" def predict_translation s, k, ini, w cmd = " echo \"#{s}\" | #{CDEC} -c #{ini} -k #{k} -w #{w} 2>/dev/null" o, s = Open3.capture2(cmd) j = -1 return o.split("\n").map{|i| j+=1; Translation.new(i, j)} end # scoring (per-sentence BLEU) def ngrams_it(s, n, fix=false) a = s.strip.split a.each_with_index { |tok, i| tok.strip! 0.upto([n-1, a.size-i-1].min) { |m| yield a[i..i+m] if !(fix||(a[i..i+m].size>n)) } } end def brevity_penalty h, r a = h.split b = r.split return 1.0 if a.size>b.size return Math.exp(1.0 - b.size.to_f/a.size); end def per_sentence_bleu h, r, n=4 h_ng = {} r_ng = {} (1).upto(n) { |i| h_ng[i] = []; r_ng[i] = [] } ngrams_it(h, n) { |i| h_ng[i.size] << i } ngrams_it(r, n) { |i| r_ng[i.size] << i } m = [n,r.split.size].min weight = 1.0/m add = 0.0 sum = 0 (1).upto(m) { |i| counts_clipped = 0 counts_sum = h_ng[i].size h_ng[i].uniq.each { |j| counts_clipped += r_ng[i].count(j) } add = 1.0 if i >= 2 sum += weight * Math.log((counts_clipped + add)/(counts_sum + add)); } return brevity_penalty(h,r) * Math.exp(sum) end def score_translations a, reference a.each_with_index { |i,j| i.score = per_sentence_bleu i.s, reference } end ### /scoring ### hope and fear def hope_and_fear a, act='hope' max = -1.0/0 max_idx = -1 a.each_with_index { |i,j| if act=='hope' && i.model + i.score > max max_idx = j; max = i.model + i.score end if act=='fear' && i.model - i.score > max max_idx = j; max = i.model - i.score end } return a[max_idx] end ### /hope and fear ### update def update w, hope, fear w = w + (hope.f - fear.f) return w end ### /update ### weights class NamedSparseVector attr_accessor :h def initialize init=nil @h = {} @h = init if init @h.default = 0.0 end def + other new_h = Hash.new new_h.update @h ret = NamedSparseVector.new new_h other.each_pair { |k,v| ret[k]+=v } return ret end def from_file fn f = File.new(fn, 'r') while line = f.gets name, value = line.strip.split value = value.to_f @h[name] = value end end def to_file s = [] @h.each_pair { |k,v| s << "#{k} #{v}" } s.join("\n")+"\n" end def - other new_h = Hash.new new_h.update @h ret = NamedSparseVector.new new_h other.each_pair { |k,v| ret[k]-=v } return ret end def * scalar raise ArgumentError, "Arg is not numeric #{scalar}" unless scalar.is_a? Numeric ret = NamedSparseVector.new @h.keys.each { |k| ret[k] = @h[k]*scalar } return ret end def dot other sum = 0.0 @h.each_pair { |k,v| sum += v * other[k] } return sum end def [] k @h[k] end def []= k, v @h[k] = v end def each_pair @h.each_pair { |k,v| yield k,v } end def to_s @h.to_s end def size @h.keys.size end end ### /weights def test opts w = NamedSparseVector.new w.from_file opts[:init_weights] input = File.new(opts[:input], 'r').readlines.map{|i|i.strip} references = File.new(opts[:references], 'r').readlines.map{|i|i.strip} f = File.new('weights.tmp', 'w+') f.write w.to_file f.close kbest = predict_translation input[0], opts[:k], 'weights.tmp' score_translations kbest, references[0] kbest.each_with_index { |i,j| puts "#{i.rank} #{i.s} #{i.model} #{i.score}" } puts puts "hope" hope = hope_and_fear kbest, 'hope' puts "#{hope.rank} #{hope.s} #{hope.model} #{hope.score}" puts "fear" fear = hope_and_fear kbest, 'fear' puts "#{fear.rank} #{fear.s} #{fear.model} #{fear.score}" end def adj_model a min = a.map{|i|i.model}.min max = a.map{|i|i.model}.max a.each { |i| i.model = (i.model-min)/(max-min) } end def main opts = Trollop::options do opt :k, "k", :type => :int, :required => true opt :input, "'foreign' input", :type => :string, :required => true opt :references, "(parseable) references", :type => :string, :required => true opt :gold, "gold standard parser output", :type => :string, :require => true opt :gold_mrl, "gold standard mrl", :type => :string, :short => '-h', :require => true opt :init_weights, "initial weights", :type => :string, :required => true, :short => '-w' opt :cdec_ini, "cdec config file", :type => :string, :default => './cdec.ini' end input = File.new(opts[:input], 'r').readlines.map{|i|i.strip} references = File.new(opts[:references], 'r').readlines.map{|i|i.strip} gold = File.new(opts[:gold], 'r').readlines.map{|i|i.strip} gold_mrl = File.new(opts[:gold_mrl], 'r').readlines.map{|i|i.strip} # init weights w = NamedSparseVector.new w.from_file opts[:init_weights] positive_feedback = 0 without_translations = 0 with_proper_parse = 0 with_output = 0 count = 0 input.each_with_index { |i,j| count += 1 # write current weights to file tmp_file = Tempfile.new('rampion') tmp_file_path = tmp_file.path tmp_file.write w.to_file tmp_file.close # get kbest list for current input kbest = predict_translation i, opts[:k], opts[:cdec_ini], tmp_file_path if kbest.size==0 # FIXME: shouldnt happen without_translations += 1 next end score_translations kbest, references[j] adj_model kbest # get feedback puts "----top1" puts "0 #{kbest[0].s} #{kbest[0].model} #{kbest[0].score}" feedback, func, output = exec kbest[0].s, gold[j] with_proper_parse +=1 if func!="None" with_output +=1 if output!="null" positive_feedback += 1 if feedback==true hope = ''; fear = '' if feedback==true puts "'#{kbest[0].s}'" references[j] = kbest[0].s hope = kbest[0] else hope = hope_and_fear kbest, 'hope' end fear = hope_and_fear kbest, 'fear' puts "----hope" puts "#{hope.rank} #{hope.s} #{hope.model} #{hope.score}" exec hope.s, gold[j] puts "----fear" puts "#{fear.rank} #{fear.s} #{fear.model} #{fear.score}" exec fear.s, gold[j] puts "----reference" puts "// #{references[j]} // 1.0" exec references[j], gold[j] puts "GOLD MRL: #{gold_mrl[j]}" puts "GOLD OUTPUT #{gold[j]}" puts w = update w, hope, fear } puts "#{count} examples" puts "#{((positive_feedback.to_f/count)*100).round 2}% with positive feedback (abs: #{positive_feedback})" puts "#{((with_proper_parse.to_f/count)*100).round 2}% with proper parse (abs: #{with_proper_parse})" puts "#{((with_output.to_f/count)*100).round 2}% with output (abs: #{with_output})" puts "#{((without_translations.to_f/count)*100).round 2}% without translations (abs: #{without_translations})" end main