proper

author: Patrick Simianer <simianer@cl.uni-heidelberg.de> 2014-02-16 00:05:48 +0100
committer: Patrick Simianer <simianer@cl.uni-heidelberg.de> 2014-02-16 00:05:48 +0100
commit: 0023defd7a3c2ecde219f1364ed06668ec59186a (patch)
tree: 27b6e901aa1043e3ff85982ffec9a7986a28369d
parent: cbafa90cb1a6b363b797c0f889c1c35749dee874 (diff)
3 files changed, 65 insertions, 78 deletions
diff --git a/.gitignore b/.gitignore
index 5f28f5b..1272d07 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,4 @@
 prototype
+proper
+nlp_ruby
+rampion_with_feedback.rb
diff --git a/hopefear.rb b/hopefear.rb
index 0423d26..918b71f 100644
--- a/hopefear.rb
+++ b/hopefear.rb
@@ -1,12 +1,12 @@
 def hope_and_fear kbest, action
   max = -1.0/0
   max_idx = -1
-  kbest.each_with_index { |i,j|
-    if action=='hope' && i.score + i.other_score > max
-      max_idx = j; max = i.score + i.other_score
+  kbest.each_with_index { |k,i|
+    if action=='hope' && k.scores[:decoder] + k.scores[:psb] > max
+      max_idx = i; max = k.scores[:decoder] + k.scores[:psb]
     end
-    if action=='fear' && i.score - i.other_score > max
-      max_idx = j; max = i.score - i.other_score
+    if action=='fear' && k.scores[:decoder] - k.scores[:psb] > max
+      max_idx = i; max = k.scores[:decoder] - k.scores[:psb]
     end
   }
   return kbest[max_idx]
@@ -36,7 +36,7 @@ def gethopefear_fear_no_exec kbest, feedback, gold, max
     hope = hope_and_fear(kbest, 'hope')
     type2 = true
   end
-  kbest.sort{|x,y|(y.score+y.other_score)<=>(x.score+x.other_score)}.each_with_index { |k,i|
+  kbest.sort{|x,y|(y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb])}.each_with_index { |k,i|
     break if i==max
     if !exec(k.s, gold, true)[0]
        fear = k
@@ -65,7 +65,7 @@ end
 def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max
   hope = fear = nil; hope_idx = 0
   type1 = type2 = false
-  sorted_kbest = kbest.sort{|x,y|(y.score+y.other_score)<=>(x.score+x.other_score)}
+  sorted_kbest = kbest.sort{|x,y|(y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb])}
   if feedback == true
     hope = kbest[0]
     type1 = true
diff --git a/rampfion.rb b/rampfion.rb
index 3ff216e..72680bb 100755
--- a/rampfion.rb
+++ b/rampfion.rb
@@ -7,40 +7,36 @@ require 'memcached'
 require_relative './hopefear'
 
 
-SMT_SEMPARSE = 'python /workspace/grounded/smt-semparse-cp/decode_sentence.py /workspace/grounded/smt-semparse-cp/working/full_dataset 2>/dev/null'
+SMT_SEMPARSE = 'python /workspace/grounded/smt-semparse-cp/decode_sentence.py /workspace/grounded/smt-semparse-cp/working/full_dataset'
 EVAL_PL = '/workspace/grounded/wasp-1.0/data/geo-funql/eval/eval.pl'
 $cache = Memcached.new("localhost:11211")
 
 def exec natural_language_string, reference_output, no_output=false
-  func = nil
-  output = nil
-  feedback = nil
+  mrl = output = feedback = nil
   key_prefix = natural_language_string.encode("ASCII", :invalid => :replace, :undef => :replace, :replace => "?").gsub(/ /,'_')
   begin
-    func = $cache.get key_prefix+"__FUNC"
-    output = $cache.get key_prefix+"__OUTPUT"
+    mrl      = $cache.get key_prefix+"__MRL"
+    output   = $cache.get key_prefix+"__OUTPUT"
     feedback = $cache.get key_prefix+"__FEEDBACK"
   rescue Memcached::NotFound
-    #func   = spawn_with_timeout("#{SMT_SEMPARSE} \"#{natural_language_string}\"").strip
-    func   = `#{SMT_SEMPARSE} "#{natural_language_string}"`.strip
-    #output = spawn_with_timeout("echo \"execute_funql_query(#{func}, X).\" | swipl -s #{EVAL_PL} 2>&1  | grep \"X =\"").strip.split('X = ')[1]
-    output = `echo "execute_funql_query(#{func}, X)." | swipl -s #{EVAL_PL} 2>&1  | grep "X ="`.strip.split('X = ')[1]
+    mrl      = spawn_with_timeout("#{SMT_SEMPARSE} \"#{natural_language_string}\" ", 60).strip
+    output   = spawn_with_timeout("echo \"execute_funql_query(#{mrl}, X).\" | swipl -s #{EVAL_PL} 2>&1  | grep \"X =\"", 60).strip.split('X = ')[1]
     feedback = output==reference_output
     begin
-      $cache.set key_prefix+"__FUNC", func
+      $cache.set key_prefix+"__MRL", mrl
       $cache.set key_prefix+"__OUTPUT", output
       $cache.set key_prefix+"__FEEDBACK", feedback
     rescue SystemExit, Interrupt
-      $cache.delete key_prefix+"__FUNC"
+      $cache.delete key_prefix+"__MRL"
       $cache.delete key_prefix+"__OUTPUT"
       $cache.delete key_prefix+"__FEEDBACK"
     end
   end
   STDERR.write "        nrl: #{natural_language_string}\n" if !no_output
-  STDERR.write "        mrl: #{func}\n" if !no_output
+  STDERR.write "        mrl: #{mrl}\n" if !no_output
   STDERR.write "     output: #{output}\n" if !no_output
   STDERR.write "   correct?: #{feedback}\n" if !no_output
-  return feedback, func, output
+  return feedback, mrl, output
 end
 
 class Stats
@@ -52,10 +48,9 @@ class Stats
     @with_correct_output = 0.0
   end
 
-  # FIXME
-  def update feedback, func, output
-    @with_parse +=1 if func!='None'&&func!=''
-    @with_output +=1 if output!='null'&&output!=''
+  def update feedback, mrl, output
+    @with_parse += 1 if mrl!=''
+    @with_output += 1 if output!=''
     @with_correct_output += 1 if feedback==true
   end
 
@@ -69,60 +64,52 @@ eos
   end
 end
 
-# map model scores to lie within [0,1]
 def adjust_model_scores kbest, factor
-  min = kbest.map{ |k| k.score }.min
-  max = kbest.map{ |k| k.score }.max
-  kbest.each { |k| k.score = factor*((k.score-min)/(max-min)) }
-end
-
-def update model, hope, fear, eta
-  diff = hope.f - fear.f
-  diff *= eta
-  model += diff
-  return model
+  min = kbest.map{ |k| k.scores[:decoder] }.min
+  max = kbest.map{ |k| k.scores[:decoder] }.max
+  kbest.each { |k| k.scores[:decoder] = factor*((k.scores[:decoder]-min)/(max-min)) }
 end
 
 def main
   cfg = Trollop::options do
     # data
-    opt :k,             "k",                      :type => :int,    :default => 10000, :short => '-k'
-    opt :input,         "'foreign' input",        :type => :string, :required => true, :short => '-i'
-    opt :references,    "(parseable) references", :type => :string, :required => true, :short => '-r'
-    opt :gold,          "gold output",            :type => :string, :required => true, :short => '-g'
-    opt :gold_mrl,      "gold parse",             :type => :string, :required => true, :short => '-h'
-    opt :init_weights,  "initial weights",        :type => :string, :required => true, :short => '-w'
-    opt :cdec_ini,      "cdec config file",       :type => :string, :required => true, :short => '-c'
+    opt :k,              "k",                      :type => :int,    :default =>   100,            :short => '-k'
+    opt :input,          "'foreign' input",        :type => :string, :required => true,            :short => '-i'
+    opt :references,     "(parseable) references", :type => :string, :required => true,            :short => '-r'
+    opt :gold,           "gold output",            :type => :string, :required => true,            :short => '-g'
+    opt :gold_mrl,       "gold parse",             :type => :string, :required => true,            :short => '-h'
+    opt :init_weights,   "initial weights",        :type => :string, :required => true,            :short => '-w'
+    opt :cdec_ini,       "cdec config file",       :type => :string, :required => true,            :short => '-c'
+    opt :stopwords_file, "stopwords file",         :type => :string, :default => 'd/stopwords.en', :short => '-t'
     # output
     opt :output_weights, "output file for final weights", :type => :string, :required => true, :short => '-o'
     opt :debug,          "debug output",                  :type => :bool,   :default => false, :short => '-d'
     opt :print_kbest,    "print full kbest lists",        :type => :bool,   :default => false, :short => '-l'
     # learning parameters
-    opt :eta,                    "learning rate",                                              :type => :float, :default => 0.01,  :short => '-e'
-    opt :iterate,                "iteration X epochs",                                         :type => :int,   :default => 1,     :short => '-j'
-    opt :stop_after,             "stop after x examples",                                      :type => :int,   :default => -1,    :short => '-s'
-    opt :scale_model,            "scale model scores by this factor",                          :type => :float, :default => 1.0,   :short => '-m'
-    opt :normalize,              "normalize weights after each update",                        :type => :bool,  :default => false, :short => '-n'
-    opt :skip_on_no_proper_gold, "skip, if the reference didn't produce a proper gold output", :type => :bool,  :default => false, :short => '-x'
-    opt :no_update,              "don't update weights",                                       :type => :bool,  :default => false, :short => '-y'
-    opt :hope_fear_max,          "FIXME",                                                      :type => :int,   :default => 32,    :short => '-q'
+    opt :eta,                    "learning rate",                                              :type => :float, :default => 0.01,   :short => '-e'
+    opt :iterate,                "iteration X epochs",                                         :type => :int,   :default => 1,      :short => '-j'
+    opt :stop_after,             "stop after x examples",                                      :type => :int,   :default => -1,     :short => '-s'
+    opt :scale_model,            "scale model scores by this factor",                          :type => :float, :default => 1.0,    :short => '-m'
+    opt :normalize,              "normalize weights after each update",                        :type => :bool,  :default => false,  :short => '-n'
+    opt :skip_on_no_proper_gold, "skip, if the reference didn't produce a proper gold output", :type => :bool,  :default => false,  :short => '-x'
+    opt :no_update,              "don't update weights",                                       :type => :bool,  :default => false,  :short => '-y'
+    opt :hope_fear_max,          "# entries to consider when searching good hope/fear",        :type => :int,   :default => 10**10, :short => '-q'
     opt :variant, "standard, rampion, fear_no_exec, fear_no_exec_skip, fear_no_exec_hope_exec, fear_no_exec_hope_exec_skip, only_exec", :default => 'standard', :short => '-v'
   end
 
   STDERR.write "CONFIGURATION\n"
   cfg.each_pair { |k,v| STDERR.write " #{k}=#{v}\n" }
 
-  input      = ReadFile.new(cfg[:input]).readlines_strip
-  references = ReadFile.new(cfg[:references]).readlines_strip
-  gold       = ReadFile.new(cfg[:gold]).readlines_strip
-  gold_mrl   = ReadFile.new(cfg[:gold_mrl]).readlines_strip # FIXME => prolog!
-  stopwords  = ReadFile.new('prototype/d/stopwords.en').readlines_strip
+  input      = ReadFile.readlines_strip cfg[:input]
+  references = ReadFile.readlines_strip cfg[:references]
+  gold       = ReadFile.readlines_strip cfg[:gold]
+  gold_mrl   = ReadFile.readlines_strip cfg[:gold_mrl]
+  stopwords  = ReadFile.readlines_strip cfg[:stopwords_file]
 
   own_references = nil
   own_references = references.map{ |i| nil } if cfg[:variant]=='only_exec'
 
-  w = SparseVector.new
-  w.from_kv_file cfg[:init_weights]
+  w = SparseVector.from_file cfg[:init_weights]
   last_weights_fn = ''
 
   cfg[:iterate].times { |iter|
@@ -134,7 +121,6 @@ def main
     top1_stats = Stats.new 'top1'
     hope_stats = Stats.new 'hope'
     fear_stats = Stats.new 'fear'
-    refs_stats = Stats.new 'refs'
     type1_updates     = 0
     type2_updates     = 0
     top1_hit          = 0
@@ -146,19 +132,21 @@ def main
     kbest_sz          = 0
 
     input.each_with_index { |i,j|
+      break if cfg[:stop_after]>0&&count==cfg[:stop_after]
       count += 1
 
       tmp_file        = Tempfile.new('rampion')
       tmp_file_path   = tmp_file.path
       last_weights_fn = tmp_file.path
-      tmp_file.write w.to_kv ' '
+      tmp_file.write w.to_kv ' ', "\n"
       tmp_file.close
 
-      kbest = CDEC::kbest i, cfg[:cdec_ini], tmp_file_path, cfg[:k]
+      kbest = cdec_kbest '/workspace/grounded/mt-system/cdec/decoder/cdec', i, cfg[:cdec_ini], tmp_file_path, cfg[:k]
       kbest_sz += kbest.size
 
       STDERR.write "\n=================\n"
       STDERR.write "    EXAMPLE: #{j}\n"
+      STDERR.write "  REFERENCE: #{references[j]}\n"
       STDERR.write "   GOLD MRL: #{gold_mrl[j]}\n"
       STDERR.write "GOLD OUTPUT: #{gold[j]}\n"
 
@@ -176,22 +164,22 @@ def main
         end
       end
 
-      kbest.each { |k| k.other_score = BLEU::per_sentence_bleu k.s, references[j] }
+      kbest.each { |k| k.scores[:psb] = BLEU::per_sentence_bleu k.s, references[j] }
 
       if cfg[:print_kbest]
         STDERR.write "\n<<< KBEST\n"
-        kbest.each_with_index { |k,l| STDERR.write k.to_s+"\n" }
+        kbest.each_with_index { |k,l| STDERR.write k.to_s2+"\n" }
         STDERR.write ">>>\n"
       end
 
       adjust_model_scores kbest, cfg[:scale_model]
 
       STDERR.write "\n [TOP1]\n"
-      STDERR.write "#{kbest[0].s}\n"
       puts "#{kbest[0].s}" if iter+1==cfg[:iterate]
 
-      feedback, func, output = exec kbest[0].s, gold[j]
-      top1_stats.update feedback, func, output
+      feedback, mrl, output = exec kbest[0].s, gold[j]
+      STDERR.write "     SCORES: #{kbest[0].scores.to_s}\n"
+      top1_stats.update feedback, mrl, output
 
 
       hope = fear = new_reference = nil
@@ -240,31 +228,28 @@ def main
 
       STDERR.write "\n [HOPE]\n"
       if hope
-        feedback, func, output =  exec hope.s, gold[j]
-        hope_stats.update feedback, func, output
+        feedback, mrl, output =  exec hope.s, gold[j]
+        STDERR.write "     SCORES: #{hope.scores.to_s}, ##{hope.rank}\n"
+        hope_stats.update feedback, mrl, output
       end
       STDERR.write "\n [FEAR]\n"
       if fear
-        feedback, func, output = exec fear.s, gold[j]
-        fear_stats.update  feedback, func, output
+        feedback, mrl, output = exec fear.s, gold[j]
+        STDERR.write "     SCORES: #{fear.scores.to_s}, ##{fear.rank}\n"
+        fear_stats.update  feedback, mrl, output
       end
-      STDERR.write "\n [REFERENCE]\n"
-      feedback, func, output = exec references[j], gold[j]
-      refs_stats.update feedback, func, output
 
       if skip || !hope || !fear
         STDERR.write "NO GOOD HOPE/FEAR, skipping example\n\n"
         next
       end
 
-      w = update w, hope, fear, cfg[:eta] if !cfg[:no_update]
+      w += (hope.f - fear.f) * cfg[:eta] if !cfg[:no_update]
       w.normalize! if cfg[:normalize]
-
-      break if cfg[:stop_after]>0&&(j+1)==cfg[:stop_after]
     }
 
     if cfg[:iterate] > 1
-      WriteFile.new("#{cfg[:output_weights]}.#{iter}.gz").write(ReadFile.new(last_weights_fn).read)
+      WriteFile.write ReadFile.read(last_weights_fn), "#{cfg[:output_weights]}.#{iter}.gz"
     else
       FileUtils::cp(last_weights_fn, cfg[:output_weights])
     end
@@ -288,7 +273,6 @@ def main
 #{top1_stats.to_s count}
 #{hope_stats.to_s count}
 #{fear_stats.to_s count}
-#{refs_stats.to_s count}
 
 eos
author	Patrick Simianer <simianer@cl.uni-heidelberg.de>	2014-02-16 00:05:48 +0100
committer	Patrick Simianer <simianer@cl.uni-heidelberg.de>	2014-02-16 00:05:48 +0100
commit	0023defd7a3c2ecde219f1364ed06668ec59186a (patch)
tree	27b6e901aa1043e3ff85982ffec9a7986a28369d
parent	cbafa90cb1a6b363b797c0f889c1c35749dee874 (diff)