finished refactoring

author: Patrick Simianer <simianer@cl.uni-heidelberg.de> 2014-02-13 11:21:34 +0100
committer: Patrick Simianer <simianer@cl.uni-heidelberg.de> 2014-02-13 11:21:34 +0100
commit: cbafa90cb1a6b363b797c0f889c1c35749dee874 (patch)
tree: 5aad78c4629654a375c021d85689b6524da96525
parent: aefd923601d6457103069ebda91abc4caae297f8 (diff)
2 files changed, 414 insertions, 598 deletions
diff --git a/hopefear.rb b/hopefear.rb
new file mode 100644
index 0000000..0423d26
--- /dev/null
+++ b/hopefear.rb
@@ -0,0 +1,193 @@
+def hope_and_fear kbest, action
+  max = -1.0/0
+  max_idx = -1
+  kbest.each_with_index { |i,j|
+    if action=='hope' && i.score + i.other_score > max
+      max_idx = j; max = i.score + i.other_score
+    end
+    if action=='fear' && i.score - i.other_score > max
+      max_idx = j; max = i.score - i.other_score
+    end
+  }
+  return kbest[max_idx]
+end
+
+def gethopefear_standard kbest, feedback
+  hope = fear = nil
+  type1 = type2 = false
+  if feedback == true
+    hope = kbest[0]
+    type1 = true
+  else
+    hope = hope_and_fear(kbest, 'hope')
+    type2 = true
+  end
+  fear = hope_and_fear(kbest, 'fear')
+  return hope, fear, false, type1, type2
+end
+
+def gethopefear_fear_no_exec kbest, feedback, gold, max
+  hope = fear = nil
+  type1 = type2 = false
+  if feedback == true
+    hope = kbest[0]
+    type1 = true
+  else
+    hope = hope_and_fear(kbest, 'hope')
+    type2 = true
+  end
+  kbest.sort{|x,y|(y.score+y.other_score)<=>(x.score+x.other_score)}.each_with_index { |k,i|
+    break if i==max
+    if !exec(k.s, gold, true)[0]
+       fear = k
+       break
+    end
+  }
+  skip=true if !fear
+  return hope, fear, skip, type1, type2
+end
+
+def gethopefear_fear_no_exec_skip kbest, feedback, gold
+  hope = fear = nil
+  type1 = type2 = false
+  if feedback == true
+    hope = kbest[0]
+    type1 = true
+  else
+    hope = hope_and_fear(kbest, 'hope')
+    type2 = true
+  end
+  fear = hope_and_fear(kbest, 'fear')
+  skip = exec(fear.s, gold, true)[0]
+  return hope, fear, skip, type1, type2
+end
+
+def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max
+  hope = fear = nil; hope_idx = 0
+  type1 = type2 = false
+  sorted_kbest = kbest.sort{|x,y|(y.score+y.other_score)<=>(x.score+x.other_score)}
+  if feedback == true
+    hope = kbest[0]
+    type1 = true
+  else
+    sorted_kbest.each_with_index { |k,i|
+      next if i==0
+      break if i==max
+      if exec(k.s, gold, true)[0]
+        hope_idx = i
+        hope = k
+        break
+      end
+    }
+    type2 = true
+  end
+  sorted_kbest.each_with_index { |k,i|
+    break if i>(kbest.size-(hope_idx+1))||i==max
+    if !exec(k.s, gold, true)[0]
+      fear = k
+      break
+    end
+  }
+  skip = true if !hope||!fear
+  return hope, fear, skip, type1, type2
+end
+
+def gethopefear_fear_no_exec_hope_exec_skip kbest, feedback, gold, max
+  hope = fear = nil
+  type1 = type2 = false
+  if feedback == true
+    hope = kbest[0]
+    type1 = true
+  else
+    hope = hope_and_fear(kbest, 'hope')
+    type2 = true
+  end
+  fear = hope_and_fear(kbest, 'fear')
+  skip = exec(fear.s, gold, true)[0]||!exec(hope.s, gold, true)[0]
+  return hope, fear, skip, type1, type2
+end
+
+
+def gethopefear_only_exec kbest, feedback, gold, max, own_reference=nil
+  hope = fear = nil; hope_idx = 0; new_reference = nil
+  type1 = type2 = false
+  if feedback == true
+    hope = kbest[0]
+    new_reference = hope
+    type1 = true
+  elsif own_reference
+    hope = own_reference
+    type1 = true
+  else
+    kbest.each_with_index { |k,i|
+      next if i==0
+      break if i==max
+      if exec(k.s, gold, true)[0]
+        hope_idx = i
+        hope = k
+        break
+      end
+    }
+    type2 = true
+  end
+  kbest.each_with_index { |k,i|
+    next if i==0||i==hope_idx
+    break if i==max
+    if !exec(k.s, gold, true)[0]
+      fear = k
+      break
+    end
+  }
+  skip = true if !hope||!fear
+  return hope, fear, skip, type1, type2, new_reference
+end
+
+def gethopefear_only_exec_simple kbest, feedback, gold, max, own_reference=nil
+  hope = fear = nil; hope_idx = 0; new_reference = nil
+  type1 = type2 = false
+  if feedback == true
+    hope = kbest[0]
+    new_reference = hope
+    type1 = true
+  elsif own_reference
+    hope = own_reference
+    type1 = true
+  else
+    kbest.each_with_index { |k,i|
+      next if i==0
+      break if i==max
+      if exec(k.s, gold, true)[0]
+        hope_idx = i
+        hope = k
+        break
+      end
+    }
+    type2 = true
+  end
+  kbest.each_with_index { |k,i|
+    next if i==0||i==hope_idx
+    break if i==max
+    if !exec(k.s, gold, true)[0]
+      fear = k
+      break
+    end
+  }
+  skip = true if !hope||!fear
+  return hope, fear, skip, type1, type2, new_reference
+end
+
+def gethopefear_rampion kbest, reference
+  hope = fear = nil
+  type1 = type2 = false
+  if kbest[0].s == reference
+    hope = kbest[0]
+    fear = hope_and_fear(kbest, 'fear')
+    type1 = true
+  else
+    hope = hope_and_fear(kbest, 'hope')
+    fear = kbest[0]
+    type2 = true
+  end
+  return hope, fear, false, type1, type2
+end
+
diff --git a/rampfion.rb b/rampfion.rb
index 24a6497..3ff216e 100755
--- a/rampfion.rb
+++ b/rampfion.rb
@@ -1,35 +1,16 @@
 #!/usr/bin/env ruby
 
+require 'nlp_ruby'
 require 'trollop'
 require 'tempfile'
-require 'open3'
 require 'memcached'
-require 'timeout'
+require_relative './hopefear'
 
 
 SMT_SEMPARSE = 'python /workspace/grounded/smt-semparse-cp/decode_sentence.py /workspace/grounded/smt-semparse-cp/working/full_dataset 2>/dev/null'
 EVAL_PL = '/workspace/grounded/wasp-1.0/data/geo-funql/eval/eval.pl'
-CDEC = "/toolbox/cdec-dtrain/bin/cdec"
-
 $cache = Memcached.new("localhost:11211")
 
-# the semantic parser hangs sometimes
-def spawn_with_timeout cmd, t=4, debug=false
-  puts cmd if debug
-  pipe_in, pipe_out = IO.pipe
-  pid = Process.spawn(cmd, :out => pipe_out)
-  begin
-    Timeout.timeout(t) { Process.wait pid }
-  rescue Timeout::Error
-    return ""
-    # accept the zombies
-    #Process.kill('TERM', pid)
-  end
-  pipe_out.close
-  return pipe_in.read
-end
-
-# execute
 def exec natural_language_string, reference_output, no_output=false
   func = nil
   output = nil
@@ -55,621 +36,263 @@ def exec natural_language_string, reference_output, no_output=false
       $cache.delete key_prefix+"__FEEDBACK"
     end
   end
-  puts "        nrl: #{natural_language_string}" if !no_output
-  puts "        mrl: #{func}" if !no_output
-  puts "     output: #{output}" if !no_output
-  puts "   correct?: #{feedback}" if !no_output
+  STDERR.write "        nrl: #{natural_language_string}\n" if !no_output
+  STDERR.write "        mrl: #{func}\n" if !no_output
+  STDERR.write "     output: #{output}\n" if !no_output
+  STDERR.write "   correct?: #{feedback}\n" if !no_output
   return feedback, func, output
 end
 
-# decoder interaction/translations
-class Translation
-  attr_accessor :s, :f, :rank, :model, :score
-
-  def initialize kbest_line, rank=-1
-    a = kbest_line.split ' ||| '
-    @s = a[1].strip
-    h = {}
-    a[2].split.each { |i|
-      name, value = i.split '='
-      value = value.to_f
-      h[name] = value
-    }
-    @f = NamedSparseVector.new h
-    @rank = rank
-    @model = a[3].to_f
-    @score = -1.0
-  end
-
-  def to_s
-    "#{@rank} ||| #{@s} ||| #{@model} ||| #{@score} ||| #{@f.to_s}"
-  end
-end
-
-def predict_translation s, k, ini, w
-  o, s = Open3.capture2 "echo \"#{s}\" | #{CDEC} -c #{ini} -r -k #{k} -w #{w} 2>/dev/null"
-  j = -1
-  return o.split("\n").map{|i| j+=1; Translation.new(i, j)}
-end
-
-# scoring (per-sentence BLEU)
-def ngrams_it(s, n, fix=false)
-  a = s.strip.split
-  a.each_with_index { |tok, i|
-    tok.strip!
-    0.upto([n-1, a.size-i-1].min) { |m|
-      yield a[i..i+m] if !(fix||(a[i..i+m].size>n))
-    }
-  }
-end
-
-def brevity_penalty hypothesis, reference
-  a = hypothesis.split; b = reference.split
-  return 1.0 if a.size>b.size
-  return Math.exp(1.0 - b.size.to_f/a.size);
-end
-
-def per_sentence_bleu hypothesis, reference, n=4
-  h_ng = {}; r_ng = {}
-  (1).upto(n) {|i| h_ng[i] = []; r_ng[i] = []}
-  ngrams_it(hypothesis, n) {|i| h_ng[i.size] << i}
-  ngrams_it(reference, n) {|i| r_ng[i.size] << i}
-  m = [n, reference.split.size].min
-  weight = 1.0/m
-  add = 0.0
-  sum = 0
-  (1).upto(m) { |i|
-    counts_clipped = 0
-    counts_sum = h_ng[i].size
-    h_ng[i].uniq.each {|j| counts_clipped += r_ng[i].count(j)}
-    add = 1.0 if i >= 2
-    sum += weight * Math.log((counts_clipped + add)/(counts_sum + add));
-  }
-  return brevity_penalty(hypothesis, reference) * Math.exp(sum)
-end
-
-def score_translations list_of_translations, reference
-  list_of_translations.each { |i| i.score = per_sentence_bleu i.s, reference}
-end
-
-# hope and fear
-def hope_and_fear kbest, action
-  max = -1.0/0
-  max_idx = -1
-  kbest.each_with_index { |i,j|
-  if action=='hope' && i.model + i.score > max
-    max_idx = j; max = i.model + i.score
-  end
-  if action=='fear' && i.model - i.score > max
-    max_idx = j; max = i.model - i.score
-  end
-  }
-  return kbest[max_idx]
-end
-
-# update
-def update w, hope, fear, eta
-  diff = hope.f - fear.f
-  diff *= eta
-  w += diff
-  return w
-end
-
-# weights
-class NamedSparseVector
-  attr_accessor :h
-
-  def initialize init=nil
-    @h = {}
-    @h = init if init
-    @h.default = 0.0
-  end
-
-  def + other
-    new_h = Hash.new
-    new_h.update @h
-    ret = NamedSparseVector.new new_h
-    other.each_pair { |k,v| ret[k]+=v }
-    return ret
-  end
-
-  def from_file fn
-    f = File.new(fn, 'r')
-    while line = f.gets
-      name, value = line.strip.split
-      value = value.to_f
-      @h[name] = value
-    end
-  end
-
-  def to_file
-    s = []
-    @h.each_pair { |k,v| s << "#{k} #{v}" }
-    s.join("\n")+"\n"
-  end
-
-  def - other
-    new_h = Hash.new
-    new_h.update @h
-    ret = NamedSparseVector.new new_h
-    other.each_pair { |k,v| ret[k]-=v }
-    return ret
-  end
-
-  def * scalar
-    raise ArgumentError, "Arg is not numeric #{scalar}" unless scalar.is_a? Numeric
-    ret = NamedSparseVector.new
-    @h.keys.each { |k| ret[k] = @h[k]*scalar }
-    return ret
-  end
-
-  def dot other
-    sum = 0.0
-    @h.each_pair { |k,v|
-      sum += v * other[k]
-    }
-    return sum
-  end
-
-  def [] k
-    @h[k]
-  end
-
-  def []= k, v
-    @h[k] = v
-  end
-
-  def each_pair
-    @h.each_pair { |k,v| yield k,v }
-  end
-
-  def to_s
-    @h.to_s
-  end
-
-  def length
-    Math.sqrt(@h.values.map{|i|i*i}.inject(:+))
-  end
-
-  def normalize!
-    l = length
-    @h.each_pair { |k,v|
-      @h[k] = v/l
-    }
-  end
-
-  def size
-    @h.keys.size
-  end
-end
-
-# map models score to [0,1]
-def adj_model kbest, factor
-  min = kbest.map{|i|i.model}.min
-  max = kbest.map{|i|i.model}.max
-  kbest.each {|i| i.model = factor*((i.model-min)/(max-min))}
-end
-
 class Stats
+
   def initialize name
     @name = name
     @with_parse = 0.0
-    @with_output       = 0.0
-    @correct_output    = 0.0
+    @with_output = 0.0
+    @with_correct_output = 0.0
   end
 
+  # FIXME
   def update feedback, func, output
-    @with_parse +=1 if func!="None"&&func!=''
-    @with_output +=1 if output!="null"&&output!=''
-    @correct_output += 1 if feedback==true
+    @with_parse +=1 if func!='None'&&func!=''
+    @with_output +=1 if output!='null'&&output!=''
+    @with_correct_output += 1 if feedback==true
   end
 
-  def print total
+  def to_s total
     without_parse = total-@with_parse
 <<-eos
-  [#{@name}]
-         #{@name} with parse #{((@with_parse/total)*100).round 2}  abs:#{@with_parse}
-        #{@name} with output #{((@with_output/total)*100).round 2} abs:#{@with_output}
-#{@name} with correct output #{((@correct_output/total)*100).round 2} adj:#{((@correct_output/(total-without_parse))*100).round 2} abs:#{@correct_output}
+         #{@name} with parse #{((@with_parse/total)*100).round 2}% abs=#{@with_parse}
+        #{@name} with output #{((@with_output/total)*100).round 2}% abs=#{@with_output}
+#{@name} with correct output #{((@with_correct_output/total)*100).round 2}% adj=#{((@with_correct_output/(total-without_parse))*100).round 2} abs=#{@with_correct_output}
 eos
   end
 end
 
-def _print rank, string, model, score
-    puts "rank=#{rank} string='#{string}' model=#{model} score=#{score}"
+# map model scores to lie within [0,1]
+def adjust_model_scores kbest, factor
+  min = kbest.map{ |k| k.score }.min
+  max = kbest.map{ |k| k.score }.max
+  kbest.each { |k| k.score = factor*((k.score-min)/(max-min)) }
 end
 
-def bag_of_words s, stopwords=[]
-  s.split.uniq.sort.reject{|v| stopwords.include? v}
-end
-
-def gethopefear_standard kbest, feedback
-  hope = fear = nil
-  type1 = type2 = false
-  if feedback == true
-    hope = kbest[0]
-    type1 = true
-  else
-    hope = hope_and_fear(kbest, 'hope')
-    type2 = true
-  end
-  fear = hope_and_fear(kbest, 'fear')
-  return hope, fear, false, type1, type2
+def update model, hope, fear, eta
+  diff = hope.f - fear.f
+  diff *= eta
+  model += diff
+  return model
 end
 
-def gethopefear_fear_no_exec kbest, feedback, gold, max
-  hope = fear = nil
-  type1 = type2 = false
-  if feedback == true
-    hope = kbest[0]
-    type1 = true
-  else
-    hope = hope_and_fear(kbest, 'hope')
-    type2 = true
-  end
-  kbest.sort{|x,y|(y.model+y.score)<=>(x.model+x.score)}.each_with_index { |k,i|
-    break if i==max
-    if !exec(k.s, gold, true)[0]
-       fear = k
-       break
-    end
-  }
-  skip=true if !fear
-  return hope, fear, skip, type1, type2
-end
+def main
+  cfg = Trollop::options do
+    # data
+    opt :k,             "k",                      :type => :int,    :default => 10000, :short => '-k'
+    opt :input,         "'foreign' input",        :type => :string, :required => true, :short => '-i'
+    opt :references,    "(parseable) references", :type => :string, :required => true, :short => '-r'
+    opt :gold,          "gold output",            :type => :string, :required => true, :short => '-g'
+    opt :gold_mrl,      "gold parse",             :type => :string, :required => true, :short => '-h'
+    opt :init_weights,  "initial weights",        :type => :string, :required => true, :short => '-w'
+    opt :cdec_ini,      "cdec config file",       :type => :string, :required => true, :short => '-c'
+    # output
+    opt :output_weights, "output file for final weights", :type => :string, :required => true, :short => '-o'
+    opt :debug,          "debug output",                  :type => :bool,   :default => false, :short => '-d'
+    opt :print_kbest,    "print full kbest lists",        :type => :bool,   :default => false, :short => '-l'
+    # learning parameters
+    opt :eta,                    "learning rate",                                              :type => :float, :default => 0.01,  :short => '-e'
+    opt :iterate,                "iteration X epochs",                                         :type => :int,   :default => 1,     :short => '-j'
+    opt :stop_after,             "stop after x examples",                                      :type => :int,   :default => -1,    :short => '-s'
+    opt :scale_model,            "scale model scores by this factor",                          :type => :float, :default => 1.0,   :short => '-m'
+    opt :normalize,              "normalize weights after each update",                        :type => :bool,  :default => false, :short => '-n'
+    opt :skip_on_no_proper_gold, "skip, if the reference didn't produce a proper gold output", :type => :bool,  :default => false, :short => '-x'
+    opt :no_update,              "don't update weights",                                       :type => :bool,  :default => false, :short => '-y'
+    opt :hope_fear_max,          "FIXME",                                                      :type => :int,   :default => 32,    :short => '-q'
+    opt :variant, "standard, rampion, fear_no_exec, fear_no_exec_skip, fear_no_exec_hope_exec, fear_no_exec_hope_exec_skip, only_exec", :default => 'standard', :short => '-v'
+  end
+
+  STDERR.write "CONFIGURATION\n"
+  cfg.each_pair { |k,v| STDERR.write " #{k}=#{v}\n" }
+
+  input      = ReadFile.new(cfg[:input]).readlines_strip
+  references = ReadFile.new(cfg[:references]).readlines_strip
+  gold       = ReadFile.new(cfg[:gold]).readlines_strip
+  gold_mrl   = ReadFile.new(cfg[:gold_mrl]).readlines_strip # FIXME => prolog!
+  stopwords  = ReadFile.new('prototype/d/stopwords.en').readlines_strip
 
-def gethopefear_fear_no_exec_skip kbest, feedback, gold
-  hope = fear = nil
-  type1 = type2 = false
-  if feedback == true
-    hope = kbest[0]
-    type1 = true
-  else
-    hope = hope_and_fear(kbest, 'hope')
-    type2 = true
-  end
-  fear = hope_and_fear(kbest, 'fear')
-  skip = exec(fear.s, gold, true)[0]
-  return hope, fear, skip, type1, type2
-end
+  own_references = nil
+  own_references = references.map{ |i| nil } if cfg[:variant]=='only_exec'
+
+  w = SparseVector.new
+  w.from_kv_file cfg[:init_weights]
+  last_weights_fn = ''
+
+  cfg[:iterate].times { |iter|
+
+    # numerous counters
+    count                 = 0
+    without_translation   = 0
+    no_proper_gold_output = 0
+    top1_stats = Stats.new 'top1'
+    hope_stats = Stats.new 'hope'
+    fear_stats = Stats.new 'fear'
+    refs_stats = Stats.new 'refs'
+    type1_updates     = 0
+    type2_updates     = 0
+    top1_hit          = 0
+    top1_variant      = 0
+    top1_true_variant = 0
+    hope_hit          = 0
+    hope_variant      = 0
+    hope_true_variant = 0
+    kbest_sz          = 0
+
+    input.each_with_index { |i,j|
+      count += 1
+
+      tmp_file        = Tempfile.new('rampion')
+      tmp_file_path   = tmp_file.path
+      last_weights_fn = tmp_file.path
+      tmp_file.write w.to_kv ' '
+      tmp_file.close
+
+      kbest = CDEC::kbest i, cfg[:cdec_ini], tmp_file_path, cfg[:k]
+      kbest_sz += kbest.size
+
+      STDERR.write "\n=================\n"
+      STDERR.write "    EXAMPLE: #{j}\n"
+      STDERR.write "   GOLD MRL: #{gold_mrl[j]}\n"
+      STDERR.write "GOLD OUTPUT: #{gold[j]}\n"
+
+      if kbest.size == 0
+        without_translation += 1
+        STDERR.write "NO MT OUTPUT, skipping example\n"
+        next
+      end
 
-def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max
-  hope = fear = nil; hope_idx = 0
-  type1 = type2 = false
-  sorted_kbest = kbest.sort{|x,y|(y.model+y.score)<=>(x.model+x.score)}
-  if feedback == true
-    hope = kbest[0]
-    type1 = true
-  else
-    sorted_kbest.each_with_index { |k,i|
-      next if i==0
-      break if i==max
-      if exec(k.s, gold, true)[0]
-        hope_idx = i
-        hope = k
-        break
+      if gold[j] == '[]' || gold[j] == '[...]' || gold[j] == '[].'
+        no_proper_gold_output += 1
+        if cfg[:skip_on_no_proper_gold]
+          STDERR.write "NO PROPER GOLD OUTPUT, skipping example\n"
+          next
+        end
       end
-    }
-    type2 = true
-  end
-  sorted_kbest.each_with_index { |k,i|
-    break if i>(kbest.size-(hope_idx+1))||i==max
-    if !exec(k.s, gold, true)[0]
-      fear = k
-      break
-    end
-  }
-  skip = true if !hope||!fear
-  return hope, fear, skip, type1, type2
-end
 
-def gethopefear_fear_no_exec_hope_exec_skip kbest, feedback, gold, max
-  hope = fear = nil
-  type1 = type2 = false
-  if feedback == true
-    hope = kbest[0]
-    type1 = true
-  else
-    hope = hope_and_fear(kbest, 'hope')
-    type2 = true
-  end
-  fear = hope_and_fear(kbest, 'fear')
-  skip = exec(fear.s, gold, true)[0]||!exec(hope.s, gold, true)[0]
-  return hope, fear, skip, type1, type2
-end
+      kbest.each { |k| k.other_score = BLEU::per_sentence_bleu k.s, references[j] }
 
+      if cfg[:print_kbest]
+        STDERR.write "\n<<< KBEST\n"
+        kbest.each_with_index { |k,l| STDERR.write k.to_s+"\n" }
+        STDERR.write ">>>\n"
+      end
 
-def gethopefear_only_exec kbest, feedback, gold, max, own_reference=nil
-  hope = fear = nil; hope_idx = 0; new_reference = nil
-  type1 = type2 = false
-  if feedback == true
-    hope = kbest[0]
-    new_reference = hope
-    type1 = true
-  elsif own_reference
-    hope = own_reference
-    type1 = true
-  else
-    kbest.each_with_index { |k,i|
-      next if i==0
-      break if i==max
-      if exec(k.s, gold, true)[0]
-        hope_idx = i
-        hope = k
-        break
+      adjust_model_scores kbest, cfg[:scale_model]
+
+      STDERR.write "\n [TOP1]\n"
+      STDERR.write "#{kbest[0].s}\n"
+      puts "#{kbest[0].s}" if iter+1==cfg[:iterate]
+
+      feedback, func, output = exec kbest[0].s, gold[j]
+      top1_stats.update feedback, func, output
+
+
+      hope = fear = new_reference = nil
+      type1 = type2 = skip = false
+      case cfg[:variant]
+      when 'standard'
+        hope, fear, skip, type1, type2 = gethopefear_standard kbest, feedback
+      when 'rampion'
+        hope, fear, skip, type1, type2 = gethopefear_rampion kbest, references[j]
+      when 'fear_no_exec_skip'
+        hope, fear, skip, type1, type2 = gethopefear_fear_no_exec_skip kbest, feedback, gold[j]
+      when 'fear_no_exec'
+        hope, fear, skip, type1, type2 = gethopefear_fear_no_exec kbest, feedback, gold[j], cfg[:hope_fear_max]
+      when 'fear_no_exec_hope_exec'
+        hope, fear, skip, type1, type2 = gethopefear_fear_no_exec_hope_exec kbest, feedback, gold[j], cfg[:hope_fear_max]
+      when 'fear_no_exec_hope_exec_skip'
+        hope, fear, skip, type1, type2 = gethopefear_fear_no_exec_hope_exec_skip kbest, feedback, gold[j], cfg[:hope_fear_max]
+      when 'only_exec'
+        hope, fear, skip, type1, type2, new_reference = gethopefear_only_exec kbest, feedback, gold[j], cfg[:hope_fear_max], own_references[j]
+      else
+        STDERR.write "NO SUCH VARIANT, exiting.\n"
+        exit 1
       end
-    }
-    type2 = true
-  end
-  kbest.each_with_index { |k,i|
-    next if i==0||i==hope_idx
-    break if i==max
-    if !exec(k.s, gold, true)[0]
-      fear = k
-      break
-    end
-  }
-  skip = true if !hope||!fear
-  return hope, fear, skip, type1, type2, new_reference
-end
 
-def gethopefear_only_exec_simple kbest, feedback, gold, max, own_reference=nil
-  hope = fear = nil; hope_idx = 0; new_reference = nil
-  type1 = type2 = false
-  if feedback == true
-    hope = kbest[0]
-    new_reference = hope
-    type1 = true
-  elsif own_reference
-    hope = own_reference
-    type1 = true
-  else
-    kbest.each_with_index { |k,i|
-      next if i==0
-      break if i==max
-      if exec(k.s, gold, true)[0]
-        hope_idx = i
-        hope = k
-        break
+      if new_reference
+        own_references[j] = new_reference
       end
-    }
-    type2 = true
-  end
-  kbest.each_with_index { |k,i|
-    next if i==0||i==hope_idx
-    break if i==max
-    if !exec(k.s, gold, true)[0]
-      fear = k
-      break
-    end
-  }
-  skip = true if !hope||!fear
-  return hope, fear, skip, type1, type2, new_reference
-end
 
-def gethopefear_rampion kbest, reference
-  hope = fear = nil
-  type1 = type2 = false
-  if kbest[0].s == reference
-    hope = kbest[0]
-    fear = hope_and_fear(kbest, 'fear')
-    type1 = true
-  else
-    hope = hope_and_fear(kbest, 'hope')
-    fear = kbest[0]
-    type2 = true
-  end
-  return hope, fear, false, type1, type2
-end
+      type1_updates+=1 if type1
+      type2_updates+=1 if type2
 
-def main
-  opts = Trollop::options do
-    # data
-    opt :k, "k", :type => :int, :default => 10000
-    opt :hope_fear_max, "asdf",  :type => :int, :default => 32, :short => '-q'
-    opt :input, "'foreign' input", :type => :string, :required => true
-    opt :references, "(parseable) references", :type => :string, :required => true
-    opt :gold, "gold output", :type => :string, :require => true
-    opt :gold_mrl, "gold parse", :type => :string, :short => '-h', :require => true
-    opt :init_weights, "initial weights", :type => :string, :required => true, :short => '-w'
-    opt :cdec_ini, "cdec config file", :type => :string, :default => './cdec.ini'
-    # output
-    opt :debug, "debug output", :type => :bool, :default => false
-    opt :output_weights, "output file for final weights", :type => :string, :required => true
-    opt :stop_after, "stop after x examples", :type => :int, :default => -1
-    opt :print_kbests, "print full kbest lists", :type => :bool, :default => false, :short => '-l'
-    # important parameters
-    opt :eta, "learning rate", :type => :float, :default => 0.01
-    opt :iterate, "iteration X epochs", :type => :int, :default => 1, :short => '-j'
-    opt :variant, "standard, rampion, fear_no_exec, fear_no_exec_skip, fear_no_exec_hope_exec, fear_no_exec_hope_exec_skip, only_exec", :default => 'standard'
-    # misc parameters
-    opt :scale_model, "scale model score by this factor", :type => :float, :default => 1.0, :short => '-m'
-    opt :normalize, "normalize weights after each update", :type => :bool, :default => false, :short => '-n'
-    opt :skip_on_no_proper_gold, "skip if the reference didn't produce a proper gold output", :default => false, :short => '-x'
-    opt :no_update, "don't update weights", :type => :bool, :default => false, :short => '-y'
-  end
-  # output configuration
-  puts "cfg"
-  opts.each_pair {|k,v| puts "#{k}=#{v}"}
-  puts
-  # read files
-  input      = File.readlines(opts[:input], :encoding=>'utf-8').map{|i|i.strip}
-  references = File.readlines(opts[:references], :encoding=>'utf-8').map{|i|i.strip}
-  gold       = File.readlines(opts[:gold], :encoding=>'utf-8').map{|i|i.strip}
-  gold_mrl   = File.readlines(opts[:gold_mrl], :encoding=>'utf-8').map{|i|i.strip}
-  stopwords  = File.readlines('d/stopwords.en', :encoding=>'utf-8').map{|i|i.strip}
-  # only_exec: new refs
-  own_references = nil
-  own_references = references.map{|i|nil} if opts[:variant]== 'only_exec'
-  # init weights
-  w = NamedSparseVector.new
-  w.from_file opts[:init_weights]
-  last_wf = ''
-# iterate
-opts[:iterate].times { |iter|
-  # numerous counters
-  without_translations  = 0
-  no_proper_gold_output = 0
-  count                 = 0
-  top1_stats = Stats.new 'top1'
-  hope_stats = Stats.new 'hope'
-  fear_stats = Stats.new 'fear'
-  refs_stats = Stats.new 'refs'
-  type1_updates     = 0
-  type2_updates     = 0
-  top1_hit          = 0
-  top1_variant      = 0
-  top1_real_variant = 0
-  hope_hit          = 0
-  hope_variant      = 0
-  hope_real_variant = 0
-  kbest_sz          = 0
-  # for each example
-  input.each_with_index { |i,j|
-    count += 1
-    # write current weights to file
-    tmp_file = Tempfile.new('rampion')
-    tmp_file_path = tmp_file.path
-    last_wf = tmp_file.path
-    tmp_file.write w.to_file
-    tmp_file.close
-    # get kbest list for current input
-    kbest = predict_translation i, opts[:k], opts[:cdec_ini], tmp_file_path
-    kbest_sz += kbest.size
-    # output
-    puts "EXAMPLE #{j}"
-    puts "GOLD MRL: #{gold_mrl[j]}"
-    puts "GOLD OUTPUT #{gold[j]}"
-    # skip if no translation could be produced
-    if kbest.size == 0
-      without_translations += 1
-      puts "NO MT OUTPUT, skipping example\n\n"
-      next
-    end
-    # no  proper gold
-    if gold[j] == '[]' || gold[j] == '[...]' || gold[j] == '[].'
-      no_proper_gold_output += 1
-      if opts[:skip_on_no_proper_gold]
-        puts "NO PROPER GOLD OUTPUT, skipping example\n\n"
+      ref_words = bag_of_words references[j], stopwords
+
+      if kbest[0].s == references[j]
+        top1_hit += 1
+      else
+        top1_variant += 1
+        top1_true_variant += 1 if !bag_of_words(kbest[0].s, stopwords).is_subset_of?(ref_words)
+      end
+      if hope && hope.s==references[j]
+        hope_hit += 1
+      elsif hope
+        hope_variant += 1
+        hope_true_variant += 1 if !bag_of_words(hope.s, stopwords).is_subset_of?(ref_words)
+      end
+
+      STDERR.write "\n [HOPE]\n"
+      if hope
+        feedback, func, output =  exec hope.s, gold[j]
+        hope_stats.update feedback, func, output
+      end
+      STDERR.write "\n [FEAR]\n"
+      if fear
+        feedback, func, output = exec fear.s, gold[j]
+        fear_stats.update  feedback, func, output
+      end
+      STDERR.write "\n [REFERENCE]\n"
+      feedback, func, output = exec references[j], gold[j]
+      refs_stats.update feedback, func, output
+
+      if skip || !hope || !fear
+        STDERR.write "NO GOOD HOPE/FEAR, skipping example\n\n"
         next
       end
-    end
-    # score kbest list
-    score_translations kbest, references[j]
-    # print kbest list
-    if opts[:print_kbests]
-      puts "<<<KBEST"
-      kbest.each_with_index { |k,l|
-        _print l, k.s, k.model, k.score
-      }
-      puts ">>>"
-    end
-    # adjust model scores to fit in [0,1]
-    adj_model kbest, opts[:scale_model]
-    # top1
-    puts "---top1"
-    puts "TOP1 TRANSLATION: #{kbest[0].s}" if iter+1==opts[:iterate]
-    _print 0, kbest[0].s, kbest[0].model, kbest[0].score
-    feedback, func, output = exec kbest[0].s, gold[j]
-    top1_stats.update feedback, func, output
-    # reference as bag of words
-    ref_words = bag_of_words references[j], stopwords
-    # hope and fear
-    hope = fear = new_reference = nil
-    type1 = type2 = skip = false
-    if    opts[:variant] == 'standard'
-      hope, fear, skip, type1, type2 = gethopefear_standard kbest, feedback
-    elsif opts[:variant] == 'rampion'
-      hope, fear, skip, type1, type2 = gethopefear_rampion kbest, references[j]
-    elsif opts[:variant] == 'fear_no_exec_skip'
-      hope, fear, skip, type1, type2 = gethopefear_fear_no_exec_skip kbest, feedback, gold[j]
-    elsif opts[:variant] == 'fear_no_exec'
-      hope, fear, skip, type1, type2 = gethopefear_fear_no_exec kbest, feedback, gold[j], opts[:hope_fear_max]
-    elsif opts[:variant] == 'fear_no_exec_hope_exec'
-      hope, fear, skip, type1, type2 = gethopefear_fear_no_exec_hope_exec kbest, feedback, gold[j], opts[:hope_fear_max]
-    elsif opts[:variant] == 'fear_no_exec_hope_exec_skip'
-      hope, fear, skip, type1, type2 = gethopefear_fear_no_exec_hope_exec_skip kbest, feedback, gold[j], opts[:hope_fear_max]
-    elsif opts[:variant] == 'only_exec'
-      hope, fear, skip, type1, type2, new_reference = gethopefear_only_exec kbest, feedback, gold[j], opts[:hope_fear_max], own_references[j]
-    else
-      puts "no such hope/fear variant"
-      exit 1
-    end
-    # new reference (only_exec)
-    if new_reference
-      own_references[j] = new_reference
-    end
-    # type1/type2
-    type1_updates+=1 if type1
-    type2_updates+=1 if type2
-    # top1/hope hit
-    if kbest[0].s == references[j]
-      top1_hit += 1
+
+      w = update w, hope, fear, cfg[:eta] if !cfg[:no_update]
+      w.normalize! if cfg[:normalize]
+
+      break if cfg[:stop_after]>0&&(j+1)==cfg[:stop_after]
+    }
+
+    if cfg[:iterate] > 1
+      WriteFile.new("#{cfg[:output_weights]}.#{iter}.gz").write(ReadFile.new(last_weights_fn).read)
     else
-      top1_variant += 1
-      top1_real_variant += 1 if bag_of_words(kbest[0].s,stopwords)!=ref_words
-    end
-    if hope&&hope.s == references[j]
-      hope_hit += 1
-    elsif hope
-      hope_variant += 1
-      hope_real_variant += 1 if bag_of_words(hope.s,stopwords)!=ref_words
+      FileUtils::cp(last_weights_fn, cfg[:output_weights])
     end
-    # output info for current example
-    puts "---hope"
-    if hope
-      _print hope.rank, hope.s, hope.model, hope.score
-      feedback, func, output =  exec hope.s, gold[j]
-      hope_stats.update feedback, func, output
-    end
-    puts "---fear"
-    if fear
-      _print fear.rank, fear.s, fear.model, fear.score
-      feedback, func, output = exec fear.s, gold[j]
-      fear_stats.update  feedback, func, output
-    end
-    puts "---reference"
-    _print 'x', references[j], 'x', 1.0
-    feedback, func, output = exec references[j], gold[j]
-    refs_stats.update feedback, func, output
-    # skip example?
-    if skip||!hope||!fear
-      puts "NO GOOD FEAR/HOPE, skipping example\n\n"
-      next
-    end
-    puts
-    # update
-    w = update w, hope, fear, opts[:eta] if !opts[:no_update]
-    # normalize weight vector to length 1
-    w.normalize! if opts[:normalize]
-    # stopx after x examples
-    break if opts[:stop_after]>0 && (j+1)==opts[:stop_after]
+
+    STDERR.write  <<-eos
+
+---
+  iteration ##{iter+1}/#{cfg[:iterate]}: #{count} examples
+        type1 updates: #{type1_updates}
+        type2 updates: #{type2_updates}
+            top1 hits: #{top1_hit}
+         top1 variant: #{top1_variant}
+    top1 true variant: #{top1_true_variant}
+            hope hits: #{hope_hit}
+         hope variant: #{hope_variant}
+    hope true variant: #{hope_true_variant}
+           kbest size: #{(kbest_sz/count).round 2}
+    #{((without_translation.to_f/count)*100).round 2}% without translations (abs: #{without_translation})
+    #{((no_proper_gold_output.to_f/count)*100).round 2}% no good gold output (abs: #{no_proper_gold_output})
+
+#{top1_stats.to_s count}
+#{hope_stats.to_s count}
+#{fear_stats.to_s count}
+#{refs_stats.to_s count}
+
+eos
+
   }
-  # keep weight files for each iteration
-  if opts[:iterate] > 1
-    FileUtils::cp(last_wf, "#{opts[:output_weights]}.#{iter}")
-  else
-    FileUtils::cp(last_wf, opts[:output_weights])
-  end
-  # output stats
-  puts "iteration ##{iter+1}/#{opts[:iterate]}"
-  puts "#{count} examples"
-  puts "    type1 updates: #{type1_updates}"
-  puts "    type2 updates: #{type2_updates}"
-  puts "        top1 hits: #{top1_hit}"
-  puts "     top1 variant: #{top1_variant}"
-  puts "top1 real variant: #{top1_real_variant}"
-  puts "        hope hits: #{hope_hit}"
-  puts "     hope variant: #{hope_variant}"
-  puts "hope real variant: #{hope_real_variant}"
-  puts "       kbest size: #{(kbest_sz/count).round 2}"
-  puts "#{((without_translations.to_f/count)*100).round 2}% without translations (abs: #{without_translations})"
-  puts "#{((no_proper_gold_output.to_f/count)*100).round 2}% no good gold output (abs: #{no_proper_gold_output})"
-  puts top1_stats.print count
-  puts hope_stats.print count
-  puts fear_stats.print count
-  puts refs_stats.print count
-}
 end
author	Patrick Simianer <simianer@cl.uni-heidelberg.de>	2014-02-13 11:21:34 +0100
committer	Patrick Simianer <simianer@cl.uni-heidelberg.de>	2014-02-13 11:21:34 +0100
commit	cbafa90cb1a6b363b797c0f889c1c35749dee874 (patch)
tree	5aad78c4629654a375c021d85689b6524da96525
parent	aefd923601d6457103069ebda91abc4caae297f8 (diff)