From 0023defd7a3c2ecde219f1364ed06668ec59186a Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 16 Feb 2014 00:05:48 +0100 Subject: proper --- .gitignore | 3 ++ hopefear.rb | 14 +++---- rampfion.rb | 126 ++++++++++++++++++++++++++---------------------------------- 3 files changed, 65 insertions(+), 78 deletions(-) diff --git a/.gitignore b/.gitignore index 5f28f5b..1272d07 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ prototype +proper +nlp_ruby +rampion_with_feedback.rb diff --git a/hopefear.rb b/hopefear.rb index 0423d26..918b71f 100644 --- a/hopefear.rb +++ b/hopefear.rb @@ -1,12 +1,12 @@ def hope_and_fear kbest, action max = -1.0/0 max_idx = -1 - kbest.each_with_index { |i,j| - if action=='hope' && i.score + i.other_score > max - max_idx = j; max = i.score + i.other_score + kbest.each_with_index { |k,i| + if action=='hope' && k.scores[:decoder] + k.scores[:psb] > max + max_idx = i; max = k.scores[:decoder] + k.scores[:psb] end - if action=='fear' && i.score - i.other_score > max - max_idx = j; max = i.score - i.other_score + if action=='fear' && k.scores[:decoder] - k.scores[:psb] > max + max_idx = i; max = k.scores[:decoder] - k.scores[:psb] end } return kbest[max_idx] @@ -36,7 +36,7 @@ def gethopefear_fear_no_exec kbest, feedback, gold, max hope = hope_and_fear(kbest, 'hope') type2 = true end - kbest.sort{|x,y|(y.score+y.other_score)<=>(x.score+x.other_score)}.each_with_index { |k,i| + kbest.sort{|x,y|(y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb])}.each_with_index { |k,i| break if i==max if !exec(k.s, gold, true)[0] fear = k @@ -65,7 +65,7 @@ end def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max hope = fear = nil; hope_idx = 0 type1 = type2 = false - sorted_kbest = kbest.sort{|x,y|(y.score+y.other_score)<=>(x.score+x.other_score)} + sorted_kbest = kbest.sort{|x,y|(y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb])} if feedback == true hope = kbest[0] type1 = true diff --git a/rampfion.rb b/rampfion.rb index 3ff216e..72680bb 100755 --- a/rampfion.rb +++ b/rampfion.rb @@ -7,40 +7,36 @@ require 'memcached' require_relative './hopefear' -SMT_SEMPARSE = 'python /workspace/grounded/smt-semparse-cp/decode_sentence.py /workspace/grounded/smt-semparse-cp/working/full_dataset 2>/dev/null' +SMT_SEMPARSE = 'python /workspace/grounded/smt-semparse-cp/decode_sentence.py /workspace/grounded/smt-semparse-cp/working/full_dataset' EVAL_PL = '/workspace/grounded/wasp-1.0/data/geo-funql/eval/eval.pl' $cache = Memcached.new("localhost:11211") def exec natural_language_string, reference_output, no_output=false - func = nil - output = nil - feedback = nil + mrl = output = feedback = nil key_prefix = natural_language_string.encode("ASCII", :invalid => :replace, :undef => :replace, :replace => "?").gsub(/ /,'_') begin - func = $cache.get key_prefix+"__FUNC" - output = $cache.get key_prefix+"__OUTPUT" + mrl = $cache.get key_prefix+"__MRL" + output = $cache.get key_prefix+"__OUTPUT" feedback = $cache.get key_prefix+"__FEEDBACK" rescue Memcached::NotFound - #func = spawn_with_timeout("#{SMT_SEMPARSE} \"#{natural_language_string}\"").strip - func = `#{SMT_SEMPARSE} "#{natural_language_string}"`.strip - #output = spawn_with_timeout("echo \"execute_funql_query(#{func}, X).\" | swipl -s #{EVAL_PL} 2>&1 | grep \"X =\"").strip.split('X = ')[1] - output = `echo "execute_funql_query(#{func}, X)." | swipl -s #{EVAL_PL} 2>&1 | grep "X ="`.strip.split('X = ')[1] + mrl = spawn_with_timeout("#{SMT_SEMPARSE} \"#{natural_language_string}\" ", 60).strip + output = spawn_with_timeout("echo \"execute_funql_query(#{mrl}, X).\" | swipl -s #{EVAL_PL} 2>&1 | grep \"X =\"", 60).strip.split('X = ')[1] feedback = output==reference_output begin - $cache.set key_prefix+"__FUNC", func + $cache.set key_prefix+"__MRL", mrl $cache.set key_prefix+"__OUTPUT", output $cache.set key_prefix+"__FEEDBACK", feedback rescue SystemExit, Interrupt - $cache.delete key_prefix+"__FUNC" + $cache.delete key_prefix+"__MRL" $cache.delete key_prefix+"__OUTPUT" $cache.delete key_prefix+"__FEEDBACK" end end STDERR.write " nrl: #{natural_language_string}\n" if !no_output - STDERR.write " mrl: #{func}\n" if !no_output + STDERR.write " mrl: #{mrl}\n" if !no_output STDERR.write " output: #{output}\n" if !no_output STDERR.write " correct?: #{feedback}\n" if !no_output - return feedback, func, output + return feedback, mrl, output end class Stats @@ -52,10 +48,9 @@ class Stats @with_correct_output = 0.0 end - # FIXME - def update feedback, func, output - @with_parse +=1 if func!='None'&&func!='' - @with_output +=1 if output!='null'&&output!='' + def update feedback, mrl, output + @with_parse += 1 if mrl!='' + @with_output += 1 if output!='' @with_correct_output += 1 if feedback==true end @@ -69,60 +64,52 @@ eos end end -# map model scores to lie within [0,1] def adjust_model_scores kbest, factor - min = kbest.map{ |k| k.score }.min - max = kbest.map{ |k| k.score }.max - kbest.each { |k| k.score = factor*((k.score-min)/(max-min)) } -end - -def update model, hope, fear, eta - diff = hope.f - fear.f - diff *= eta - model += diff - return model + min = kbest.map{ |k| k.scores[:decoder] }.min + max = kbest.map{ |k| k.scores[:decoder] }.max + kbest.each { |k| k.scores[:decoder] = factor*((k.scores[:decoder]-min)/(max-min)) } end def main cfg = Trollop::options do # data - opt :k, "k", :type => :int, :default => 10000, :short => '-k' - opt :input, "'foreign' input", :type => :string, :required => true, :short => '-i' - opt :references, "(parseable) references", :type => :string, :required => true, :short => '-r' - opt :gold, "gold output", :type => :string, :required => true, :short => '-g' - opt :gold_mrl, "gold parse", :type => :string, :required => true, :short => '-h' - opt :init_weights, "initial weights", :type => :string, :required => true, :short => '-w' - opt :cdec_ini, "cdec config file", :type => :string, :required => true, :short => '-c' + opt :k, "k", :type => :int, :default => 100, :short => '-k' + opt :input, "'foreign' input", :type => :string, :required => true, :short => '-i' + opt :references, "(parseable) references", :type => :string, :required => true, :short => '-r' + opt :gold, "gold output", :type => :string, :required => true, :short => '-g' + opt :gold_mrl, "gold parse", :type => :string, :required => true, :short => '-h' + opt :init_weights, "initial weights", :type => :string, :required => true, :short => '-w' + opt :cdec_ini, "cdec config file", :type => :string, :required => true, :short => '-c' + opt :stopwords_file, "stopwords file", :type => :string, :default => 'd/stopwords.en', :short => '-t' # output opt :output_weights, "output file for final weights", :type => :string, :required => true, :short => '-o' opt :debug, "debug output", :type => :bool, :default => false, :short => '-d' opt :print_kbest, "print full kbest lists", :type => :bool, :default => false, :short => '-l' # learning parameters - opt :eta, "learning rate", :type => :float, :default => 0.01, :short => '-e' - opt :iterate, "iteration X epochs", :type => :int, :default => 1, :short => '-j' - opt :stop_after, "stop after x examples", :type => :int, :default => -1, :short => '-s' - opt :scale_model, "scale model scores by this factor", :type => :float, :default => 1.0, :short => '-m' - opt :normalize, "normalize weights after each update", :type => :bool, :default => false, :short => '-n' - opt :skip_on_no_proper_gold, "skip, if the reference didn't produce a proper gold output", :type => :bool, :default => false, :short => '-x' - opt :no_update, "don't update weights", :type => :bool, :default => false, :short => '-y' - opt :hope_fear_max, "FIXME", :type => :int, :default => 32, :short => '-q' + opt :eta, "learning rate", :type => :float, :default => 0.01, :short => '-e' + opt :iterate, "iteration X epochs", :type => :int, :default => 1, :short => '-j' + opt :stop_after, "stop after x examples", :type => :int, :default => -1, :short => '-s' + opt :scale_model, "scale model scores by this factor", :type => :float, :default => 1.0, :short => '-m' + opt :normalize, "normalize weights after each update", :type => :bool, :default => false, :short => '-n' + opt :skip_on_no_proper_gold, "skip, if the reference didn't produce a proper gold output", :type => :bool, :default => false, :short => '-x' + opt :no_update, "don't update weights", :type => :bool, :default => false, :short => '-y' + opt :hope_fear_max, "# entries to consider when searching good hope/fear", :type => :int, :default => 10**10, :short => '-q' opt :variant, "standard, rampion, fear_no_exec, fear_no_exec_skip, fear_no_exec_hope_exec, fear_no_exec_hope_exec_skip, only_exec", :default => 'standard', :short => '-v' end STDERR.write "CONFIGURATION\n" cfg.each_pair { |k,v| STDERR.write " #{k}=#{v}\n" } - input = ReadFile.new(cfg[:input]).readlines_strip - references = ReadFile.new(cfg[:references]).readlines_strip - gold = ReadFile.new(cfg[:gold]).readlines_strip - gold_mrl = ReadFile.new(cfg[:gold_mrl]).readlines_strip # FIXME => prolog! - stopwords = ReadFile.new('prototype/d/stopwords.en').readlines_strip + input = ReadFile.readlines_strip cfg[:input] + references = ReadFile.readlines_strip cfg[:references] + gold = ReadFile.readlines_strip cfg[:gold] + gold_mrl = ReadFile.readlines_strip cfg[:gold_mrl] + stopwords = ReadFile.readlines_strip cfg[:stopwords_file] own_references = nil own_references = references.map{ |i| nil } if cfg[:variant]=='only_exec' - w = SparseVector.new - w.from_kv_file cfg[:init_weights] + w = SparseVector.from_file cfg[:init_weights] last_weights_fn = '' cfg[:iterate].times { |iter| @@ -134,7 +121,6 @@ def main top1_stats = Stats.new 'top1' hope_stats = Stats.new 'hope' fear_stats = Stats.new 'fear' - refs_stats = Stats.new 'refs' type1_updates = 0 type2_updates = 0 top1_hit = 0 @@ -146,19 +132,21 @@ def main kbest_sz = 0 input.each_with_index { |i,j| + break if cfg[:stop_after]>0&&count==cfg[:stop_after] count += 1 tmp_file = Tempfile.new('rampion') tmp_file_path = tmp_file.path last_weights_fn = tmp_file.path - tmp_file.write w.to_kv ' ' + tmp_file.write w.to_kv ' ', "\n" tmp_file.close - kbest = CDEC::kbest i, cfg[:cdec_ini], tmp_file_path, cfg[:k] + kbest = cdec_kbest '/workspace/grounded/mt-system/cdec/decoder/cdec', i, cfg[:cdec_ini], tmp_file_path, cfg[:k] kbest_sz += kbest.size STDERR.write "\n=================\n" STDERR.write " EXAMPLE: #{j}\n" + STDERR.write " REFERENCE: #{references[j]}\n" STDERR.write " GOLD MRL: #{gold_mrl[j]}\n" STDERR.write "GOLD OUTPUT: #{gold[j]}\n" @@ -176,22 +164,22 @@ def main end end - kbest.each { |k| k.other_score = BLEU::per_sentence_bleu k.s, references[j] } + kbest.each { |k| k.scores[:psb] = BLEU::per_sentence_bleu k.s, references[j] } if cfg[:print_kbest] STDERR.write "\n<<< KBEST\n" - kbest.each_with_index { |k,l| STDERR.write k.to_s+"\n" } + kbest.each_with_index { |k,l| STDERR.write k.to_s2+"\n" } STDERR.write ">>>\n" end adjust_model_scores kbest, cfg[:scale_model] STDERR.write "\n [TOP1]\n" - STDERR.write "#{kbest[0].s}\n" puts "#{kbest[0].s}" if iter+1==cfg[:iterate] - feedback, func, output = exec kbest[0].s, gold[j] - top1_stats.update feedback, func, output + feedback, mrl, output = exec kbest[0].s, gold[j] + STDERR.write " SCORES: #{kbest[0].scores.to_s}\n" + top1_stats.update feedback, mrl, output hope = fear = new_reference = nil @@ -240,31 +228,28 @@ def main STDERR.write "\n [HOPE]\n" if hope - feedback, func, output = exec hope.s, gold[j] - hope_stats.update feedback, func, output + feedback, mrl, output = exec hope.s, gold[j] + STDERR.write " SCORES: #{hope.scores.to_s}, ##{hope.rank}\n" + hope_stats.update feedback, mrl, output end STDERR.write "\n [FEAR]\n" if fear - feedback, func, output = exec fear.s, gold[j] - fear_stats.update feedback, func, output + feedback, mrl, output = exec fear.s, gold[j] + STDERR.write " SCORES: #{fear.scores.to_s}, ##{fear.rank}\n" + fear_stats.update feedback, mrl, output end - STDERR.write "\n [REFERENCE]\n" - feedback, func, output = exec references[j], gold[j] - refs_stats.update feedback, func, output if skip || !hope || !fear STDERR.write "NO GOOD HOPE/FEAR, skipping example\n\n" next end - w = update w, hope, fear, cfg[:eta] if !cfg[:no_update] + w += (hope.f - fear.f) * cfg[:eta] if !cfg[:no_update] w.normalize! if cfg[:normalize] - - break if cfg[:stop_after]>0&&(j+1)==cfg[:stop_after] } if cfg[:iterate] > 1 - WriteFile.new("#{cfg[:output_weights]}.#{iter}.gz").write(ReadFile.new(last_weights_fn).read) + WriteFile.write ReadFile.read(last_weights_fn), "#{cfg[:output_weights]}.#{iter}.gz" else FileUtils::cp(last_weights_fn, cfg[:output_weights]) end @@ -288,7 +273,6 @@ def main #{top1_stats.to_s count} #{hope_stats.to_s count} #{fear_stats.to_s count} -#{refs_stats.to_s count} eos -- cgit v1.2.3