diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2014-02-13 11:21:34 +0100 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2014-02-13 11:21:34 +0100 |
commit | cbafa90cb1a6b363b797c0f889c1c35749dee874 (patch) | |
tree | 5aad78c4629654a375c021d85689b6524da96525 | |
parent | aefd923601d6457103069ebda91abc4caae297f8 (diff) |
finished refactoring
-rw-r--r-- | hopefear.rb | 193 | ||||
-rwxr-xr-x | rampfion.rb | 819 |
2 files changed, 414 insertions, 598 deletions
diff --git a/hopefear.rb b/hopefear.rb new file mode 100644 index 0000000..0423d26 --- /dev/null +++ b/hopefear.rb @@ -0,0 +1,193 @@ +def hope_and_fear kbest, action + max = -1.0/0 + max_idx = -1 + kbest.each_with_index { |i,j| + if action=='hope' && i.score + i.other_score > max + max_idx = j; max = i.score + i.other_score + end + if action=='fear' && i.score - i.other_score > max + max_idx = j; max = i.score - i.other_score + end + } + return kbest[max_idx] +end + +def gethopefear_standard kbest, feedback + hope = fear = nil + type1 = type2 = false + if feedback == true + hope = kbest[0] + type1 = true + else + hope = hope_and_fear(kbest, 'hope') + type2 = true + end + fear = hope_and_fear(kbest, 'fear') + return hope, fear, false, type1, type2 +end + +def gethopefear_fear_no_exec kbest, feedback, gold, max + hope = fear = nil + type1 = type2 = false + if feedback == true + hope = kbest[0] + type1 = true + else + hope = hope_and_fear(kbest, 'hope') + type2 = true + end + kbest.sort{|x,y|(y.score+y.other_score)<=>(x.score+x.other_score)}.each_with_index { |k,i| + break if i==max + if !exec(k.s, gold, true)[0] + fear = k + break + end + } + skip=true if !fear + return hope, fear, skip, type1, type2 +end + +def gethopefear_fear_no_exec_skip kbest, feedback, gold + hope = fear = nil + type1 = type2 = false + if feedback == true + hope = kbest[0] + type1 = true + else + hope = hope_and_fear(kbest, 'hope') + type2 = true + end + fear = hope_and_fear(kbest, 'fear') + skip = exec(fear.s, gold, true)[0] + return hope, fear, skip, type1, type2 +end + +def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max + hope = fear = nil; hope_idx = 0 + type1 = type2 = false + sorted_kbest = kbest.sort{|x,y|(y.score+y.other_score)<=>(x.score+x.other_score)} + if feedback == true + hope = kbest[0] + type1 = true + else + sorted_kbest.each_with_index { |k,i| + next if i==0 + break if i==max + if exec(k.s, gold, true)[0] + hope_idx = i + hope = k + break + end + } + type2 = true + end + sorted_kbest.each_with_index { |k,i| + break if i>(kbest.size-(hope_idx+1))||i==max + if !exec(k.s, gold, true)[0] + fear = k + break + end + } + skip = true if !hope||!fear + return hope, fear, skip, type1, type2 +end + +def gethopefear_fear_no_exec_hope_exec_skip kbest, feedback, gold, max + hope = fear = nil + type1 = type2 = false + if feedback == true + hope = kbest[0] + type1 = true + else + hope = hope_and_fear(kbest, 'hope') + type2 = true + end + fear = hope_and_fear(kbest, 'fear') + skip = exec(fear.s, gold, true)[0]||!exec(hope.s, gold, true)[0] + return hope, fear, skip, type1, type2 +end + + +def gethopefear_only_exec kbest, feedback, gold, max, own_reference=nil + hope = fear = nil; hope_idx = 0; new_reference = nil + type1 = type2 = false + if feedback == true + hope = kbest[0] + new_reference = hope + type1 = true + elsif own_reference + hope = own_reference + type1 = true + else + kbest.each_with_index { |k,i| + next if i==0 + break if i==max + if exec(k.s, gold, true)[0] + hope_idx = i + hope = k + break + end + } + type2 = true + end + kbest.each_with_index { |k,i| + next if i==0||i==hope_idx + break if i==max + if !exec(k.s, gold, true)[0] + fear = k + break + end + } + skip = true if !hope||!fear + return hope, fear, skip, type1, type2, new_reference +end + +def gethopefear_only_exec_simple kbest, feedback, gold, max, own_reference=nil + hope = fear = nil; hope_idx = 0; new_reference = nil + type1 = type2 = false + if feedback == true + hope = kbest[0] + new_reference = hope + type1 = true + elsif own_reference + hope = own_reference + type1 = true + else + kbest.each_with_index { |k,i| + next if i==0 + break if i==max + if exec(k.s, gold, true)[0] + hope_idx = i + hope = k + break + end + } + type2 = true + end + kbest.each_with_index { |k,i| + next if i==0||i==hope_idx + break if i==max + if !exec(k.s, gold, true)[0] + fear = k + break + end + } + skip = true if !hope||!fear + return hope, fear, skip, type1, type2, new_reference +end + +def gethopefear_rampion kbest, reference + hope = fear = nil + type1 = type2 = false + if kbest[0].s == reference + hope = kbest[0] + fear = hope_and_fear(kbest, 'fear') + type1 = true + else + hope = hope_and_fear(kbest, 'hope') + fear = kbest[0] + type2 = true + end + return hope, fear, false, type1, type2 +end + diff --git a/rampfion.rb b/rampfion.rb index 24a6497..3ff216e 100755 --- a/rampfion.rb +++ b/rampfion.rb @@ -1,35 +1,16 @@ #!/usr/bin/env ruby +require 'nlp_ruby' require 'trollop' require 'tempfile' -require 'open3' require 'memcached' -require 'timeout' +require_relative './hopefear' SMT_SEMPARSE = 'python /workspace/grounded/smt-semparse-cp/decode_sentence.py /workspace/grounded/smt-semparse-cp/working/full_dataset 2>/dev/null' EVAL_PL = '/workspace/grounded/wasp-1.0/data/geo-funql/eval/eval.pl' -CDEC = "/toolbox/cdec-dtrain/bin/cdec" - $cache = Memcached.new("localhost:11211") -# the semantic parser hangs sometimes -def spawn_with_timeout cmd, t=4, debug=false - puts cmd if debug - pipe_in, pipe_out = IO.pipe - pid = Process.spawn(cmd, :out => pipe_out) - begin - Timeout.timeout(t) { Process.wait pid } - rescue Timeout::Error - return "" - # accept the zombies - #Process.kill('TERM', pid) - end - pipe_out.close - return pipe_in.read -end - -# execute def exec natural_language_string, reference_output, no_output=false func = nil output = nil @@ -55,621 +36,263 @@ def exec natural_language_string, reference_output, no_output=false $cache.delete key_prefix+"__FEEDBACK" end end - puts " nrl: #{natural_language_string}" if !no_output - puts " mrl: #{func}" if !no_output - puts " output: #{output}" if !no_output - puts " correct?: #{feedback}" if !no_output + STDERR.write " nrl: #{natural_language_string}\n" if !no_output + STDERR.write " mrl: #{func}\n" if !no_output + STDERR.write " output: #{output}\n" if !no_output + STDERR.write " correct?: #{feedback}\n" if !no_output return feedback, func, output end -# decoder interaction/translations -class Translation - attr_accessor :s, :f, :rank, :model, :score - - def initialize kbest_line, rank=-1 - a = kbest_line.split ' ||| ' - @s = a[1].strip - h = {} - a[2].split.each { |i| - name, value = i.split '=' - value = value.to_f - h[name] = value - } - @f = NamedSparseVector.new h - @rank = rank - @model = a[3].to_f - @score = -1.0 - end - - def to_s - "#{@rank} ||| #{@s} ||| #{@model} ||| #{@score} ||| #{@f.to_s}" - end -end - -def predict_translation s, k, ini, w - o, s = Open3.capture2 "echo \"#{s}\" | #{CDEC} -c #{ini} -r -k #{k} -w #{w} 2>/dev/null" - j = -1 - return o.split("\n").map{|i| j+=1; Translation.new(i, j)} -end - -# scoring (per-sentence BLEU) -def ngrams_it(s, n, fix=false) - a = s.strip.split - a.each_with_index { |tok, i| - tok.strip! - 0.upto([n-1, a.size-i-1].min) { |m| - yield a[i..i+m] if !(fix||(a[i..i+m].size>n)) - } - } -end - -def brevity_penalty hypothesis, reference - a = hypothesis.split; b = reference.split - return 1.0 if a.size>b.size - return Math.exp(1.0 - b.size.to_f/a.size); -end - -def per_sentence_bleu hypothesis, reference, n=4 - h_ng = {}; r_ng = {} - (1).upto(n) {|i| h_ng[i] = []; r_ng[i] = []} - ngrams_it(hypothesis, n) {|i| h_ng[i.size] << i} - ngrams_it(reference, n) {|i| r_ng[i.size] << i} - m = [n, reference.split.size].min - weight = 1.0/m - add = 0.0 - sum = 0 - (1).upto(m) { |i| - counts_clipped = 0 - counts_sum = h_ng[i].size - h_ng[i].uniq.each {|j| counts_clipped += r_ng[i].count(j)} - add = 1.0 if i >= 2 - sum += weight * Math.log((counts_clipped + add)/(counts_sum + add)); - } - return brevity_penalty(hypothesis, reference) * Math.exp(sum) -end - -def score_translations list_of_translations, reference - list_of_translations.each { |i| i.score = per_sentence_bleu i.s, reference} -end - -# hope and fear -def hope_and_fear kbest, action - max = -1.0/0 - max_idx = -1 - kbest.each_with_index { |i,j| - if action=='hope' && i.model + i.score > max - max_idx = j; max = i.model + i.score - end - if action=='fear' && i.model - i.score > max - max_idx = j; max = i.model - i.score - end - } - return kbest[max_idx] -end - -# update -def update w, hope, fear, eta - diff = hope.f - fear.f - diff *= eta - w += diff - return w -end - -# weights -class NamedSparseVector - attr_accessor :h - - def initialize init=nil - @h = {} - @h = init if init - @h.default = 0.0 - end - - def + other - new_h = Hash.new - new_h.update @h - ret = NamedSparseVector.new new_h - other.each_pair { |k,v| ret[k]+=v } - return ret - end - - def from_file fn - f = File.new(fn, 'r') - while line = f.gets - name, value = line.strip.split - value = value.to_f - @h[name] = value - end - end - - def to_file - s = [] - @h.each_pair { |k,v| s << "#{k} #{v}" } - s.join("\n")+"\n" - end - - def - other - new_h = Hash.new - new_h.update @h - ret = NamedSparseVector.new new_h - other.each_pair { |k,v| ret[k]-=v } - return ret - end - - def * scalar - raise ArgumentError, "Arg is not numeric #{scalar}" unless scalar.is_a? Numeric - ret = NamedSparseVector.new - @h.keys.each { |k| ret[k] = @h[k]*scalar } - return ret - end - - def dot other - sum = 0.0 - @h.each_pair { |k,v| - sum += v * other[k] - } - return sum - end - - def [] k - @h[k] - end - - def []= k, v - @h[k] = v - end - - def each_pair - @h.each_pair { |k,v| yield k,v } - end - - def to_s - @h.to_s - end - - def length - Math.sqrt(@h.values.map{|i|i*i}.inject(:+)) - end - - def normalize! - l = length - @h.each_pair { |k,v| - @h[k] = v/l - } - end - - def size - @h.keys.size - end -end - -# map models score to [0,1] -def adj_model kbest, factor - min = kbest.map{|i|i.model}.min - max = kbest.map{|i|i.model}.max - kbest.each {|i| i.model = factor*((i.model-min)/(max-min))} -end - class Stats + def initialize name @name = name @with_parse = 0.0 - @with_output = 0.0 - @correct_output = 0.0 + @with_output = 0.0 + @with_correct_output = 0.0 end + # FIXME def update feedback, func, output - @with_parse +=1 if func!="None"&&func!='' - @with_output +=1 if output!="null"&&output!='' - @correct_output += 1 if feedback==true + @with_parse +=1 if func!='None'&&func!='' + @with_output +=1 if output!='null'&&output!='' + @with_correct_output += 1 if feedback==true end - def print total + def to_s total without_parse = total-@with_parse <<-eos - [#{@name}] - #{@name} with parse #{((@with_parse/total)*100).round 2} abs:#{@with_parse} - #{@name} with output #{((@with_output/total)*100).round 2} abs:#{@with_output} -#{@name} with correct output #{((@correct_output/total)*100).round 2} adj:#{((@correct_output/(total-without_parse))*100).round 2} abs:#{@correct_output} + #{@name} with parse #{((@with_parse/total)*100).round 2}% abs=#{@with_parse} + #{@name} with output #{((@with_output/total)*100).round 2}% abs=#{@with_output} +#{@name} with correct output #{((@with_correct_output/total)*100).round 2}% adj=#{((@with_correct_output/(total-without_parse))*100).round 2} abs=#{@with_correct_output} eos end end -def _print rank, string, model, score - puts "rank=#{rank} string='#{string}' model=#{model} score=#{score}" +# map model scores to lie within [0,1] +def adjust_model_scores kbest, factor + min = kbest.map{ |k| k.score }.min + max = kbest.map{ |k| k.score }.max + kbest.each { |k| k.score = factor*((k.score-min)/(max-min)) } end -def bag_of_words s, stopwords=[] - s.split.uniq.sort.reject{|v| stopwords.include? v} -end - -def gethopefear_standard kbest, feedback - hope = fear = nil - type1 = type2 = false - if feedback == true - hope = kbest[0] - type1 = true - else - hope = hope_and_fear(kbest, 'hope') - type2 = true - end - fear = hope_and_fear(kbest, 'fear') - return hope, fear, false, type1, type2 +def update model, hope, fear, eta + diff = hope.f - fear.f + diff *= eta + model += diff + return model end -def gethopefear_fear_no_exec kbest, feedback, gold, max - hope = fear = nil - type1 = type2 = false - if feedback == true - hope = kbest[0] - type1 = true - else - hope = hope_and_fear(kbest, 'hope') - type2 = true - end - kbest.sort{|x,y|(y.model+y.score)<=>(x.model+x.score)}.each_with_index { |k,i| - break if i==max - if !exec(k.s, gold, true)[0] - fear = k - break - end - } - skip=true if !fear - return hope, fear, skip, type1, type2 -end +def main + cfg = Trollop::options do + # data + opt :k, "k", :type => :int, :default => 10000, :short => '-k' + opt :input, "'foreign' input", :type => :string, :required => true, :short => '-i' + opt :references, "(parseable) references", :type => :string, :required => true, :short => '-r' + opt :gold, "gold output", :type => :string, :required => true, :short => '-g' + opt :gold_mrl, "gold parse", :type => :string, :required => true, :short => '-h' + opt :init_weights, "initial weights", :type => :string, :required => true, :short => '-w' + opt :cdec_ini, "cdec config file", :type => :string, :required => true, :short => '-c' + # output + opt :output_weights, "output file for final weights", :type => :string, :required => true, :short => '-o' + opt :debug, "debug output", :type => :bool, :default => false, :short => '-d' + opt :print_kbest, "print full kbest lists", :type => :bool, :default => false, :short => '-l' + # learning parameters + opt :eta, "learning rate", :type => :float, :default => 0.01, :short => '-e' + opt :iterate, "iteration X epochs", :type => :int, :default => 1, :short => '-j' + opt :stop_after, "stop after x examples", :type => :int, :default => -1, :short => '-s' + opt :scale_model, "scale model scores by this factor", :type => :float, :default => 1.0, :short => '-m' + opt :normalize, "normalize weights after each update", :type => :bool, :default => false, :short => '-n' + opt :skip_on_no_proper_gold, "skip, if the reference didn't produce a proper gold output", :type => :bool, :default => false, :short => '-x' + opt :no_update, "don't update weights", :type => :bool, :default => false, :short => '-y' + opt :hope_fear_max, "FIXME", :type => :int, :default => 32, :short => '-q' + opt :variant, "standard, rampion, fear_no_exec, fear_no_exec_skip, fear_no_exec_hope_exec, fear_no_exec_hope_exec_skip, only_exec", :default => 'standard', :short => '-v' + end + + STDERR.write "CONFIGURATION\n" + cfg.each_pair { |k,v| STDERR.write " #{k}=#{v}\n" } + + input = ReadFile.new(cfg[:input]).readlines_strip + references = ReadFile.new(cfg[:references]).readlines_strip + gold = ReadFile.new(cfg[:gold]).readlines_strip + gold_mrl = ReadFile.new(cfg[:gold_mrl]).readlines_strip # FIXME => prolog! + stopwords = ReadFile.new('prototype/d/stopwords.en').readlines_strip -def gethopefear_fear_no_exec_skip kbest, feedback, gold - hope = fear = nil - type1 = type2 = false - if feedback == true - hope = kbest[0] - type1 = true - else - hope = hope_and_fear(kbest, 'hope') - type2 = true - end - fear = hope_and_fear(kbest, 'fear') - skip = exec(fear.s, gold, true)[0] - return hope, fear, skip, type1, type2 -end + own_references = nil + own_references = references.map{ |i| nil } if cfg[:variant]=='only_exec' + + w = SparseVector.new + w.from_kv_file cfg[:init_weights] + last_weights_fn = '' + + cfg[:iterate].times { |iter| + + # numerous counters + count = 0 + without_translation = 0 + no_proper_gold_output = 0 + top1_stats = Stats.new 'top1' + hope_stats = Stats.new 'hope' + fear_stats = Stats.new 'fear' + refs_stats = Stats.new 'refs' + type1_updates = 0 + type2_updates = 0 + top1_hit = 0 + top1_variant = 0 + top1_true_variant = 0 + hope_hit = 0 + hope_variant = 0 + hope_true_variant = 0 + kbest_sz = 0 + + input.each_with_index { |i,j| + count += 1 + + tmp_file = Tempfile.new('rampion') + tmp_file_path = tmp_file.path + last_weights_fn = tmp_file.path + tmp_file.write w.to_kv ' ' + tmp_file.close + + kbest = CDEC::kbest i, cfg[:cdec_ini], tmp_file_path, cfg[:k] + kbest_sz += kbest.size + + STDERR.write "\n=================\n" + STDERR.write " EXAMPLE: #{j}\n" + STDERR.write " GOLD MRL: #{gold_mrl[j]}\n" + STDERR.write "GOLD OUTPUT: #{gold[j]}\n" + + if kbest.size == 0 + without_translation += 1 + STDERR.write "NO MT OUTPUT, skipping example\n" + next + end -def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max - hope = fear = nil; hope_idx = 0 - type1 = type2 = false - sorted_kbest = kbest.sort{|x,y|(y.model+y.score)<=>(x.model+x.score)} - if feedback == true - hope = kbest[0] - type1 = true - else - sorted_kbest.each_with_index { |k,i| - next if i==0 - break if i==max - if exec(k.s, gold, true)[0] - hope_idx = i - hope = k - break + if gold[j] == '[]' || gold[j] == '[...]' || gold[j] == '[].' + no_proper_gold_output += 1 + if cfg[:skip_on_no_proper_gold] + STDERR.write "NO PROPER GOLD OUTPUT, skipping example\n" + next + end end - } - type2 = true - end - sorted_kbest.each_with_index { |k,i| - break if i>(kbest.size-(hope_idx+1))||i==max - if !exec(k.s, gold, true)[0] - fear = k - break - end - } - skip = true if !hope||!fear - return hope, fear, skip, type1, type2 -end -def gethopefear_fear_no_exec_hope_exec_skip kbest, feedback, gold, max - hope = fear = nil - type1 = type2 = false - if feedback == true - hope = kbest[0] - type1 = true - else - hope = hope_and_fear(kbest, 'hope') - type2 = true - end - fear = hope_and_fear(kbest, 'fear') - skip = exec(fear.s, gold, true)[0]||!exec(hope.s, gold, true)[0] - return hope, fear, skip, type1, type2 -end + kbest.each { |k| k.other_score = BLEU::per_sentence_bleu k.s, references[j] } + if cfg[:print_kbest] + STDERR.write "\n<<< KBEST\n" + kbest.each_with_index { |k,l| STDERR.write k.to_s+"\n" } + STDERR.write ">>>\n" + end -def gethopefear_only_exec kbest, feedback, gold, max, own_reference=nil - hope = fear = nil; hope_idx = 0; new_reference = nil - type1 = type2 = false - if feedback == true - hope = kbest[0] - new_reference = hope - type1 = true - elsif own_reference - hope = own_reference - type1 = true - else - kbest.each_with_index { |k,i| - next if i==0 - break if i==max - if exec(k.s, gold, true)[0] - hope_idx = i - hope = k - break + adjust_model_scores kbest, cfg[:scale_model] + + STDERR.write "\n [TOP1]\n" + STDERR.write "#{kbest[0].s}\n" + puts "#{kbest[0].s}" if iter+1==cfg[:iterate] + + feedback, func, output = exec kbest[0].s, gold[j] + top1_stats.update feedback, func, output + + + hope = fear = new_reference = nil + type1 = type2 = skip = false + case cfg[:variant] + when 'standard' + hope, fear, skip, type1, type2 = gethopefear_standard kbest, feedback + when 'rampion' + hope, fear, skip, type1, type2 = gethopefear_rampion kbest, references[j] + when 'fear_no_exec_skip' + hope, fear, skip, type1, type2 = gethopefear_fear_no_exec_skip kbest, feedback, gold[j] + when 'fear_no_exec' + hope, fear, skip, type1, type2 = gethopefear_fear_no_exec kbest, feedback, gold[j], cfg[:hope_fear_max] + when 'fear_no_exec_hope_exec' + hope, fear, skip, type1, type2 = gethopefear_fear_no_exec_hope_exec kbest, feedback, gold[j], cfg[:hope_fear_max] + when 'fear_no_exec_hope_exec_skip' + hope, fear, skip, type1, type2 = gethopefear_fear_no_exec_hope_exec_skip kbest, feedback, gold[j], cfg[:hope_fear_max] + when 'only_exec' + hope, fear, skip, type1, type2, new_reference = gethopefear_only_exec kbest, feedback, gold[j], cfg[:hope_fear_max], own_references[j] + else + STDERR.write "NO SUCH VARIANT, exiting.\n" + exit 1 end - } - type2 = true - end - kbest.each_with_index { |k,i| - next if i==0||i==hope_idx - break if i==max - if !exec(k.s, gold, true)[0] - fear = k - break - end - } - skip = true if !hope||!fear - return hope, fear, skip, type1, type2, new_reference -end -def gethopefear_only_exec_simple kbest, feedback, gold, max, own_reference=nil - hope = fear = nil; hope_idx = 0; new_reference = nil - type1 = type2 = false - if feedback == true - hope = kbest[0] - new_reference = hope - type1 = true - elsif own_reference - hope = own_reference - type1 = true - else - kbest.each_with_index { |k,i| - next if i==0 - break if i==max - if exec(k.s, gold, true)[0] - hope_idx = i - hope = k - break + if new_reference + own_references[j] = new_reference end - } - type2 = true - end - kbest.each_with_index { |k,i| - next if i==0||i==hope_idx - break if i==max - if !exec(k.s, gold, true)[0] - fear = k - break - end - } - skip = true if !hope||!fear - return hope, fear, skip, type1, type2, new_reference -end -def gethopefear_rampion kbest, reference - hope = fear = nil - type1 = type2 = false - if kbest[0].s == reference - hope = kbest[0] - fear = hope_and_fear(kbest, 'fear') - type1 = true - else - hope = hope_and_fear(kbest, 'hope') - fear = kbest[0] - type2 = true - end - return hope, fear, false, type1, type2 -end + type1_updates+=1 if type1 + type2_updates+=1 if type2 -def main - opts = Trollop::options do - # data - opt :k, "k", :type => :int, :default => 10000 - opt :hope_fear_max, "asdf", :type => :int, :default => 32, :short => '-q' - opt :input, "'foreign' input", :type => :string, :required => true - opt :references, "(parseable) references", :type => :string, :required => true - opt :gold, "gold output", :type => :string, :require => true - opt :gold_mrl, "gold parse", :type => :string, :short => '-h', :require => true - opt :init_weights, "initial weights", :type => :string, :required => true, :short => '-w' - opt :cdec_ini, "cdec config file", :type => :string, :default => './cdec.ini' - # output - opt :debug, "debug output", :type => :bool, :default => false - opt :output_weights, "output file for final weights", :type => :string, :required => true - opt :stop_after, "stop after x examples", :type => :int, :default => -1 - opt :print_kbests, "print full kbest lists", :type => :bool, :default => false, :short => '-l' - # important parameters - opt :eta, "learning rate", :type => :float, :default => 0.01 - opt :iterate, "iteration X epochs", :type => :int, :default => 1, :short => '-j' - opt :variant, "standard, rampion, fear_no_exec, fear_no_exec_skip, fear_no_exec_hope_exec, fear_no_exec_hope_exec_skip, only_exec", :default => 'standard' - # misc parameters - opt :scale_model, "scale model score by this factor", :type => :float, :default => 1.0, :short => '-m' - opt :normalize, "normalize weights after each update", :type => :bool, :default => false, :short => '-n' - opt :skip_on_no_proper_gold, "skip if the reference didn't produce a proper gold output", :default => false, :short => '-x' - opt :no_update, "don't update weights", :type => :bool, :default => false, :short => '-y' - end - # output configuration - puts "cfg" - opts.each_pair {|k,v| puts "#{k}=#{v}"} - puts - # read files - input = File.readlines(opts[:input], :encoding=>'utf-8').map{|i|i.strip} - references = File.readlines(opts[:references], :encoding=>'utf-8').map{|i|i.strip} - gold = File.readlines(opts[:gold], :encoding=>'utf-8').map{|i|i.strip} - gold_mrl = File.readlines(opts[:gold_mrl], :encoding=>'utf-8').map{|i|i.strip} - stopwords = File.readlines('d/stopwords.en', :encoding=>'utf-8').map{|i|i.strip} - # only_exec: new refs - own_references = nil - own_references = references.map{|i|nil} if opts[:variant]== 'only_exec' - # init weights - w = NamedSparseVector.new - w.from_file opts[:init_weights] - last_wf = '' -# iterate -opts[:iterate].times { |iter| - # numerous counters - without_translations = 0 - no_proper_gold_output = 0 - count = 0 - top1_stats = Stats.new 'top1' - hope_stats = Stats.new 'hope' - fear_stats = Stats.new 'fear' - refs_stats = Stats.new 'refs' - type1_updates = 0 - type2_updates = 0 - top1_hit = 0 - top1_variant = 0 - top1_real_variant = 0 - hope_hit = 0 - hope_variant = 0 - hope_real_variant = 0 - kbest_sz = 0 - # for each example - input.each_with_index { |i,j| - count += 1 - # write current weights to file - tmp_file = Tempfile.new('rampion') - tmp_file_path = tmp_file.path - last_wf = tmp_file.path - tmp_file.write w.to_file - tmp_file.close - # get kbest list for current input - kbest = predict_translation i, opts[:k], opts[:cdec_ini], tmp_file_path - kbest_sz += kbest.size - # output - puts "EXAMPLE #{j}" - puts "GOLD MRL: #{gold_mrl[j]}" - puts "GOLD OUTPUT #{gold[j]}" - # skip if no translation could be produced - if kbest.size == 0 - without_translations += 1 - puts "NO MT OUTPUT, skipping example\n\n" - next - end - # no proper gold - if gold[j] == '[]' || gold[j] == '[...]' || gold[j] == '[].' - no_proper_gold_output += 1 - if opts[:skip_on_no_proper_gold] - puts "NO PROPER GOLD OUTPUT, skipping example\n\n" + ref_words = bag_of_words references[j], stopwords + + if kbest[0].s == references[j] + top1_hit += 1 + else + top1_variant += 1 + top1_true_variant += 1 if !bag_of_words(kbest[0].s, stopwords).is_subset_of?(ref_words) + end + if hope && hope.s==references[j] + hope_hit += 1 + elsif hope + hope_variant += 1 + hope_true_variant += 1 if !bag_of_words(hope.s, stopwords).is_subset_of?(ref_words) + end + + STDERR.write "\n [HOPE]\n" + if hope + feedback, func, output = exec hope.s, gold[j] + hope_stats.update feedback, func, output + end + STDERR.write "\n [FEAR]\n" + if fear + feedback, func, output = exec fear.s, gold[j] + fear_stats.update feedback, func, output + end + STDERR.write "\n [REFERENCE]\n" + feedback, func, output = exec references[j], gold[j] + refs_stats.update feedback, func, output + + if skip || !hope || !fear + STDERR.write "NO GOOD HOPE/FEAR, skipping example\n\n" next end - end - # score kbest list - score_translations kbest, references[j] - # print kbest list - if opts[:print_kbests] - puts "<<<KBEST" - kbest.each_with_index { |k,l| - _print l, k.s, k.model, k.score - } - puts ">>>" - end - # adjust model scores to fit in [0,1] - adj_model kbest, opts[:scale_model] - # top1 - puts "---top1" - puts "TOP1 TRANSLATION: #{kbest[0].s}" if iter+1==opts[:iterate] - _print 0, kbest[0].s, kbest[0].model, kbest[0].score - feedback, func, output = exec kbest[0].s, gold[j] - top1_stats.update feedback, func, output - # reference as bag of words - ref_words = bag_of_words references[j], stopwords - # hope and fear - hope = fear = new_reference = nil - type1 = type2 = skip = false - if opts[:variant] == 'standard' - hope, fear, skip, type1, type2 = gethopefear_standard kbest, feedback - elsif opts[:variant] == 'rampion' - hope, fear, skip, type1, type2 = gethopefear_rampion kbest, references[j] - elsif opts[:variant] == 'fear_no_exec_skip' - hope, fear, skip, type1, type2 = gethopefear_fear_no_exec_skip kbest, feedback, gold[j] - elsif opts[:variant] == 'fear_no_exec' - hope, fear, skip, type1, type2 = gethopefear_fear_no_exec kbest, feedback, gold[j], opts[:hope_fear_max] - elsif opts[:variant] == 'fear_no_exec_hope_exec' - hope, fear, skip, type1, type2 = gethopefear_fear_no_exec_hope_exec kbest, feedback, gold[j], opts[:hope_fear_max] - elsif opts[:variant] == 'fear_no_exec_hope_exec_skip' - hope, fear, skip, type1, type2 = gethopefear_fear_no_exec_hope_exec_skip kbest, feedback, gold[j], opts[:hope_fear_max] - elsif opts[:variant] == 'only_exec' - hope, fear, skip, type1, type2, new_reference = gethopefear_only_exec kbest, feedback, gold[j], opts[:hope_fear_max], own_references[j] - else - puts "no such hope/fear variant" - exit 1 - end - # new reference (only_exec) - if new_reference - own_references[j] = new_reference - end - # type1/type2 - type1_updates+=1 if type1 - type2_updates+=1 if type2 - # top1/hope hit - if kbest[0].s == references[j] - top1_hit += 1 + + w = update w, hope, fear, cfg[:eta] if !cfg[:no_update] + w.normalize! if cfg[:normalize] + + break if cfg[:stop_after]>0&&(j+1)==cfg[:stop_after] + } + + if cfg[:iterate] > 1 + WriteFile.new("#{cfg[:output_weights]}.#{iter}.gz").write(ReadFile.new(last_weights_fn).read) else - top1_variant += 1 - top1_real_variant += 1 if bag_of_words(kbest[0].s,stopwords)!=ref_words - end - if hope&&hope.s == references[j] - hope_hit += 1 - elsif hope - hope_variant += 1 - hope_real_variant += 1 if bag_of_words(hope.s,stopwords)!=ref_words + FileUtils::cp(last_weights_fn, cfg[:output_weights]) end - # output info for current example - puts "---hope" - if hope - _print hope.rank, hope.s, hope.model, hope.score - feedback, func, output = exec hope.s, gold[j] - hope_stats.update feedback, func, output - end - puts "---fear" - if fear - _print fear.rank, fear.s, fear.model, fear.score - feedback, func, output = exec fear.s, gold[j] - fear_stats.update feedback, func, output - end - puts "---reference" - _print 'x', references[j], 'x', 1.0 - feedback, func, output = exec references[j], gold[j] - refs_stats.update feedback, func, output - # skip example? - if skip||!hope||!fear - puts "NO GOOD FEAR/HOPE, skipping example\n\n" - next - end - puts - # update - w = update w, hope, fear, opts[:eta] if !opts[:no_update] - # normalize weight vector to length 1 - w.normalize! if opts[:normalize] - # stopx after x examples - break if opts[:stop_after]>0 && (j+1)==opts[:stop_after] + + STDERR.write <<-eos + +--- + iteration ##{iter+1}/#{cfg[:iterate]}: #{count} examples + type1 updates: #{type1_updates} + type2 updates: #{type2_updates} + top1 hits: #{top1_hit} + top1 variant: #{top1_variant} + top1 true variant: #{top1_true_variant} + hope hits: #{hope_hit} + hope variant: #{hope_variant} + hope true variant: #{hope_true_variant} + kbest size: #{(kbest_sz/count).round 2} + #{((without_translation.to_f/count)*100).round 2}% without translations (abs: #{without_translation}) + #{((no_proper_gold_output.to_f/count)*100).round 2}% no good gold output (abs: #{no_proper_gold_output}) + +#{top1_stats.to_s count} +#{hope_stats.to_s count} +#{fear_stats.to_s count} +#{refs_stats.to_s count} + +eos + } - # keep weight files for each iteration - if opts[:iterate] > 1 - FileUtils::cp(last_wf, "#{opts[:output_weights]}.#{iter}") - else - FileUtils::cp(last_wf, opts[:output_weights]) - end - # output stats - puts "iteration ##{iter+1}/#{opts[:iterate]}" - puts "#{count} examples" - puts " type1 updates: #{type1_updates}" - puts " type2 updates: #{type2_updates}" - puts " top1 hits: #{top1_hit}" - puts " top1 variant: #{top1_variant}" - puts "top1 real variant: #{top1_real_variant}" - puts " hope hits: #{hope_hit}" - puts " hope variant: #{hope_variant}" - puts "hope real variant: #{hope_real_variant}" - puts " kbest size: #{(kbest_sz/count).round 2}" - puts "#{((without_translations.to_f/count)*100).round 2}% without translations (abs: #{without_translations})" - puts "#{((no_proper_gold_output.to_f/count)*100).round 2}% no good gold output (abs: #{no_proper_gold_output})" - puts top1_stats.print count - puts hope_stats.print count - puts fear_stats.print count - puts refs_stats.print count -} end |