2 files changed, 74 insertions, 61 deletions
diff --git a/hopefear.rb b/hopefear.rb
index 918b71f..ef1fd43 100644
--- a/hopefear.rb
+++ b/hopefear.rb
@@ -19,10 +19,10 @@ def gethopefear_standard kbest, feedback
     hope = kbest[0]
     type1 = true
   else
-    hope = hope_and_fear(kbest, 'hope')
+    hope = hope_and_fear kbest, 'hope'
     type2 = true
   end
-  fear = hope_and_fear(kbest, 'fear')
+  fear = hope_and_fear kbest, 'fear'
   return hope, fear, false, type1, type2
 end
 
@@ -33,10 +33,12 @@ def gethopefear_fear_no_exec kbest, feedback, gold, max
     hope = kbest[0]
     type1 = true
   else
-    hope = hope_and_fear(kbest, 'hope')
+    hope = hope_and_fear kbest, 'hope'
     type2 = true
   end
-  kbest.sort{|x,y|(y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb])}.each_with_index { |k,i|
+  # sorted in descending order by max(decoder, psb), best ('hope') first
+  # select the 'best' translation that does not deliver the correct answer
+  kbest.sort{ |x,y| (y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb]) }.each_with_index { |k,i|
     break if i==max
     if !exec(k.s, gold, true)[0]
        fear = k
@@ -54,10 +56,11 @@ def gethopefear_fear_no_exec_skip kbest, feedback, gold
     hope = kbest[0]
     type1 = true
   else
-    hope = hope_and_fear(kbest, 'hope')
+    hope = hope_and_fear kbest, 'hope'
     type2 = true
   end
   fear = hope_and_fear(kbest, 'fear')
+  # skip example if fear gives the right answer
   skip = exec(fear.s, gold, true)[0]
   return hope, fear, skip, type1, type2
 end
@@ -65,11 +68,13 @@ end
 def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max
   hope = fear = nil; hope_idx = 0
   type1 = type2 = false
-  sorted_kbest = kbest.sort{|x,y|(y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb])}
+  # sorted in descending order by max(decoder, psb), best ('hope') first
+  sorted_kbest = kbest.sort{ |x,y| (y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb]) }
   if feedback == true
     hope = kbest[0]
     type1 = true
   else
+    # select 'best' translation that correctly executes
     sorted_kbest.each_with_index { |k,i|
       next if i==0
       break if i==max
@@ -81,6 +86,7 @@ def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max
     }
     type2 = true
   end
+  # select 'best' translation that does not correctly execute
   sorted_kbest.each_with_index { |k,i|
     break if i>(kbest.size-(hope_idx+1))||i==max
     if !exec(k.s, gold, true)[0]
@@ -88,6 +94,7 @@ def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max
       break
     end
   }
+  # skip if hope or fear could no be found
   skip = true if !hope||!fear
   return hope, fear, skip, type1, type2
 end
@@ -99,15 +106,15 @@ def gethopefear_fear_no_exec_hope_exec_skip kbest, feedback, gold, max
     hope = kbest[0]
     type1 = true
   else
-    hope = hope_and_fear(kbest, 'hope')
+    hope = hope_and_fear kbest, 'hope'
     type2 = true
   end
-  fear = hope_and_fear(kbest, 'fear')
+  fear = hope_and_fear kbest, 'fear'
+  # skip if fear executes correctly or hope doesn't
   skip = exec(fear.s, gold, true)[0]||!exec(hope.s, gold, true)[0]
   return hope, fear, skip, type1, type2
 end
 
-
 def gethopefear_only_exec kbest, feedback, gold, max, own_reference=nil
   hope = fear = nil; hope_idx = 0; new_reference = nil
   type1 = type2 = false
@@ -119,6 +126,7 @@ def gethopefear_only_exec kbest, feedback, gold, max, own_reference=nil
     hope = own_reference
     type1 = true
   else
+    # search for first (by decoder score) translation that gives the correct answer
     kbest.each_with_index { |k,i|
       next if i==0
       break if i==max
@@ -130,40 +138,7 @@ def gethopefear_only_exec kbest, feedback, gold, max, own_reference=nil
     }
     type2 = true
   end
-  kbest.each_with_index { |k,i|
-    next if i==0||i==hope_idx
-    break if i==max
-    if !exec(k.s, gold, true)[0]
-      fear = k
-      break
-    end
-  }
-  skip = true if !hope||!fear
-  return hope, fear, skip, type1, type2, new_reference
-end
-
-def gethopefear_only_exec_simple kbest, feedback, gold, max, own_reference=nil
-  hope = fear = nil; hope_idx = 0; new_reference = nil
-  type1 = type2 = false
-  if feedback == true
-    hope = kbest[0]
-    new_reference = hope
-    type1 = true
-  elsif own_reference
-    hope = own_reference
-    type1 = true
-  else
-    kbest.each_with_index { |k,i|
-      next if i==0
-      break if i==max
-      if exec(k.s, gold, true)[0]
-        hope_idx = i
-        hope = k
-        break
-      end
-    }
-    type2 = true
-  end
+  # --"-- doesn't give the correct answer
   kbest.each_with_index { |k,i|
     next if i==0||i==hope_idx
     break if i==max
@@ -179,12 +154,14 @@ end
 def gethopefear_rampion kbest, reference
   hope = fear = nil
   type1 = type2 = false
+  # 1best is automatically hope if it matches reference
   if kbest[0].s == reference
     hope = kbest[0]
-    fear = hope_and_fear(kbest, 'fear')
+    fear = hope_and_fear kbest, 'fear'
     type1 = true
   else
-    hope = hope_and_fear(kbest, 'hope')
+    hope = hope_and_fear kbest, 'hope'
+    # 1best is automatically fear if it doesn't match reference 
     fear = kbest[0]
     type2 = true
   end
diff --git a/rampfion.rb b/rampfion.rb
index 04aec09..9ce1e9a 100755
--- a/rampfion.rb
+++ b/rampfion.rb
@@ -6,20 +6,27 @@ require 'tempfile'
 require 'memcached'
 require_relative './hopefear'
 
-
-# FIXME
+# edit here to change the parser
 SMT_SEMPARSE = 'python /workspace/grounded/smt-semparse-cp/decode_sentence.py /workspace/grounded/smt-semparse-cp/working/tgttosrc'
+
+# this is the 'fixed' version of eval.pl
 EVAL_PL = '/workspace/grounded/wasp-1.0/data/geo-funql/eval/eval.pl'
+
+# memcached hast to be running
 $cache = Memcached.new('localhost:11211')
 
+
 def exec natural_language_string, reference_output, no_output=false
   mrl = output = feedback = nil
+  # this may cause collisions, but there are not so many German words that
+  # could have different Umlauts at the same position, e.g. Häuser => H?user
   key_prefix = natural_language_string.encode('ASCII', :invalid => :replace, :undef => :replace, :replace => '?').gsub(/ /,'_')
   begin
     mrl      = $cache.get key_prefix+'__MRL'
     output   = $cache.get key_prefix+'__OUTPUT'
     feedback = $cache.get key_prefix+'__FEEDBACK'
   rescue Memcached::NotFound
+    # beware: EVAL_PL sometimes hangs and can't be killed!
     mrl      = spawn_with_timeout("#{SMT_SEMPARSE} \"#{natural_language_string}\" ", 60).strip
     output   = spawn_with_timeout("echo \"execute_funql_query(#{mrl}, X).\" | swipl -s #{EVAL_PL} 2>&1  | grep \"X =\"", 60).strip.split('X = ')[1]
     feedback = output==reference_output
@@ -74,49 +81,58 @@ end
 
 def main
   cfg = Trollop::options do
-    # data
+    # [data]
     opt :k,              "k",                      :type => :int,    :default =>   100,            :short => '-k'
     opt :input,          "'foreign' input",        :type => :string, :required => true,            :short => '-i'
     opt :references,     "(parseable) references", :type => :string, :required => true,            :short => '-r'
     opt :gold,           "gold output",            :type => :string, :required => true,            :short => '-g'
+    # just for debugging:
     opt :gold_mrl,       "gold parse",             :type => :string, :required => true,            :short => '-h'
     opt :init_weights,   "initial weights",        :type => :string, :required => true,            :short => '-w'
     opt :cdec_ini,       "cdec config file",       :type => :string, :required => true,            :short => '-c'
+    # just used for 1best/hope variant detection
     opt :stopwords_file, "stopwords file",         :type => :string, :default => 'd/stopwords.en', :short => '-t'
-    # output
+    # [output]
     opt :output_weights, "output file for final weights", :type => :string, :required => true, :short => '-o'
     opt :debug,          "debug output",                  :type => :bool,   :default => false, :short => '-d'
     opt :print_kbest,    "print full kbest lists",        :type => :bool,   :default => false, :short => '-l'
-    # learning parameters
+    # [learning parameters]
     opt :eta,                    "learning rate",                                              :type => :float, :default => 0.01,   :short => '-e'
     opt :iterate,                "iteration X epochs",                                         :type => :int,   :default => 1,      :short => '-j'
     opt :stop_after,             "stop after x examples",                                      :type => :int,   :default => -1,     :short => '-s'
     opt :scale_model,            "scale model scores by this factor",                          :type => :float, :default => 1.0,    :short => '-m'
     opt :normalize,              "normalize weights after each update",                        :type => :bool,  :default => false,  :short => '-n'
+    # don't use when 'bad' examples are filtered:
     opt :skip_on_no_proper_gold, "skip, if the reference didn't produce a proper gold output", :type => :bool,  :default => false,  :short => '-x'
     opt :no_update,              "don't update weights",                                       :type => :bool,  :default => false,  :short => '-y'
+    # don't use:
     opt :hope_fear_max,          "# entries to consider when searching good hope/fear",        :type => :int,   :default => 10**10, :short => '-q'
+    # see hopefear.rb:
     opt :variant, "standard, rampion, fear_no_exec, fear_no_exec_skip, fear_no_exec_hope_exec, fear_no_exec_hope_exec_skip, only_exec", :default => 'standard', :short => '-v'
   end
 
   STDERR.write "CONFIGURATION\n"
   cfg.each_pair { |k,v| STDERR.write " #{k}=#{v}\n" }
 
+  # read data
   input      = ReadFile.readlines_strip cfg[:input]
   references = ReadFile.readlines_strip cfg[:references]
   gold       = ReadFile.readlines_strip cfg[:gold]
   gold_mrl   = ReadFile.readlines_strip cfg[:gold_mrl]
   stopwords  = ReadFile.readlines_strip cfg[:stopwords_file]
 
+  # only for 'only_exec' variant
   own_references = nil
   own_references = references.map{ |i| nil } if cfg[:variant]=='only_exec'
 
+  # initialize model
   w = SparseVector.from_file cfg[:init_weights], ' '
   last_weights_fn = ''
 
+  # iterations loop
   cfg[:iterate].times { |iter|
 
-    # numerous counters
+    # (reset) numerous counters
     count                 = 0
     without_translation   = 0
     no_proper_gold_output = 0
@@ -133,16 +149,19 @@ def main
     hope_true_variant = 0
     kbest_sz          = 0
 
+    # input loop
     input.each_with_index { |i,j|
       break if cfg[:stop_after]>0&&count==cfg[:stop_after]
       count += 1
 
+      # write weights to file for cdec
       tmp_file        = Tempfile.new('rampion')
       tmp_file_path   = tmp_file.path
       last_weights_fn = tmp_file.path
       tmp_file.write w.to_kv ' ', "\n"
       tmp_file.close
 
+      # get kbest list
       kbest = cdec_kbest '/toolbox/cdec-dtrain/decoder/cdec', i, cfg[:cdec_ini], tmp_file_path, cfg[:k]
       kbest_sz += kbest.size
 
@@ -152,13 +171,15 @@ def main
       STDERR.write "   GOLD MRL: #{gold_mrl[j]}\n"
       STDERR.write "GOLD OUTPUT: #{gold[j]}\n"
 
+      # translation failed
       if kbest.size == 0
         without_translation += 1
         STDERR.write "NO MT OUTPUT, skipping example\n"
         next
       end
 
-      if gold[j] == '[]' || gold[j] == '[...]' || gold[j] == '[].'
+      # don't use when data is filtered
+      if gold[j] == '[]' || gold[j] == '[...]' || gold[j] == '[].' || gold[j] == '[...].'
         no_proper_gold_output += 1
         if cfg[:skip_on_no_proper_gold]
           STDERR.write "NO PROPER GOLD OUTPUT, skipping example\n"
@@ -166,6 +187,7 @@ def main
         end
       end
 
+      # get per-sentence BLEU scores
       kbest.each { |k| k.scores[:psb] = BLEU::per_sentence_bleu k.s, references[j] }
 
       if cfg[:print_kbest]
@@ -174,16 +196,20 @@ def main
         STDERR.write ">>>\n"
       end
 
+      # map decoder scores to [0,1]
       adjust_model_scores kbest, cfg[:scale_model]
 
+      # informative output
       STDERR.write "\n [TOP1]\n"
+      # print 1best on last iteration
       puts "#{kbest[0].s}" if iter+1==cfg[:iterate]
 
+      # execute 1best
       feedback, mrl, output = exec kbest[0].s, gold[j]
       STDERR.write "     SCORES: #{kbest[0].scores.to_s}\n"
       top1_stats.update feedback, mrl, output
 
-
+      # hope/fear variants
       hope = fear = new_reference = nil
       type1 = type2 = skip = false
       case cfg[:variant]
@@ -206,6 +232,7 @@ def main
         exit 1
       end
 
+      # for 'only_exec' variant
       if new_reference
         own_references[j] = new_reference
       end
@@ -213,43 +240,52 @@ def main
       type1_updates+=1 if type1
       type2_updates+=1 if type2
 
+      # for string variant detection
       ref_words = bag_of_words references[j], stopwords
 
       if kbest[0].s == references[j]
         top1_hit += 1
-      else
+      elsif feedback
         top1_variant += 1
         top1_true_variant += 1 if !bag_of_words(kbest[0].s, stopwords).is_subset_of?(ref_words)
       end
-      if hope && hope.s==references[j]
-        hope_hit += 1
-      elsif hope
-        hope_variant += 1
-        hope_true_variant += 1 if !bag_of_words(hope.s, stopwords).is_subset_of?(ref_words)
-      end
 
+      # hope output & statistics
       STDERR.write "\n [HOPE]\n"
       if hope
         feedback, mrl, output =  exec hope.s, gold[j]
         STDERR.write "     SCORES: #{hope.scores.to_s}, ##{hope.rank}\n"
         hope_stats.update feedback, mrl, output
+        if hope.s==references[j]
+          hope_hit += 1
+        elsif feedback
+          hope_variant += 1
+          hope_true_variant += 1 if !bag_of_words(hope.s, stopwords).is_subset_of?(ref_words)
+        end
       end
+
+      # fear output & statistics
       STDERR.write "\n [FEAR]\n"
       if fear
         feedback, mrl, output = exec fear.s, gold[j]
         STDERR.write "     SCORES: #{fear.scores.to_s}, ##{fear.rank}\n"
-        fear_stats.update  feedback, mrl, output
+        fear_stats.update feedback, mrl, output
       end
 
+      # skip if needed
       if skip || !hope || !fear
         STDERR.write "NO GOOD HOPE/FEAR, skipping example\n\n"
         next
       end
 
+      # update
       w += (hope.f - fear.f) * cfg[:eta] if !cfg[:no_update]
+
+      # normalize model
       w.normalize! if cfg[:normalize]
     }
 
+    # save all weights
     if cfg[:iterate] > 1
       WriteFile.write ReadFile.read(last_weights_fn), "#{cfg[:output_weights]}.#{iter}.gz"
     else