diff options
-rw-r--r-- | hopefear.rb | 67 | ||||
-rwxr-xr-x | rampfion.rb | 68 |
2 files changed, 74 insertions, 61 deletions
diff --git a/hopefear.rb b/hopefear.rb index 918b71f..ef1fd43 100644 --- a/hopefear.rb +++ b/hopefear.rb @@ -19,10 +19,10 @@ def gethopefear_standard kbest, feedback hope = kbest[0] type1 = true else - hope = hope_and_fear(kbest, 'hope') + hope = hope_and_fear kbest, 'hope' type2 = true end - fear = hope_and_fear(kbest, 'fear') + fear = hope_and_fear kbest, 'fear' return hope, fear, false, type1, type2 end @@ -33,10 +33,12 @@ def gethopefear_fear_no_exec kbest, feedback, gold, max hope = kbest[0] type1 = true else - hope = hope_and_fear(kbest, 'hope') + hope = hope_and_fear kbest, 'hope' type2 = true end - kbest.sort{|x,y|(y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb])}.each_with_index { |k,i| + # sorted in descending order by max(decoder, psb), best ('hope') first + # select the 'best' translation that does not deliver the correct answer + kbest.sort{ |x,y| (y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb]) }.each_with_index { |k,i| break if i==max if !exec(k.s, gold, true)[0] fear = k @@ -54,10 +56,11 @@ def gethopefear_fear_no_exec_skip kbest, feedback, gold hope = kbest[0] type1 = true else - hope = hope_and_fear(kbest, 'hope') + hope = hope_and_fear kbest, 'hope' type2 = true end fear = hope_and_fear(kbest, 'fear') + # skip example if fear gives the right answer skip = exec(fear.s, gold, true)[0] return hope, fear, skip, type1, type2 end @@ -65,11 +68,13 @@ end def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max hope = fear = nil; hope_idx = 0 type1 = type2 = false - sorted_kbest = kbest.sort{|x,y|(y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb])} + # sorted in descending order by max(decoder, psb), best ('hope') first + sorted_kbest = kbest.sort{ |x,y| (y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb]) } if feedback == true hope = kbest[0] type1 = true else + # select 'best' translation that correctly executes sorted_kbest.each_with_index { |k,i| next if i==0 break if i==max @@ -81,6 +86,7 @@ def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max } type2 = true end + # select 'best' translation that does not correctly execute sorted_kbest.each_with_index { |k,i| break if i>(kbest.size-(hope_idx+1))||i==max if !exec(k.s, gold, true)[0] @@ -88,6 +94,7 @@ def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max break end } + # skip if hope or fear could no be found skip = true if !hope||!fear return hope, fear, skip, type1, type2 end @@ -99,15 +106,15 @@ def gethopefear_fear_no_exec_hope_exec_skip kbest, feedback, gold, max hope = kbest[0] type1 = true else - hope = hope_and_fear(kbest, 'hope') + hope = hope_and_fear kbest, 'hope' type2 = true end - fear = hope_and_fear(kbest, 'fear') + fear = hope_and_fear kbest, 'fear' + # skip if fear executes correctly or hope doesn't skip = exec(fear.s, gold, true)[0]||!exec(hope.s, gold, true)[0] return hope, fear, skip, type1, type2 end - def gethopefear_only_exec kbest, feedback, gold, max, own_reference=nil hope = fear = nil; hope_idx = 0; new_reference = nil type1 = type2 = false @@ -119,6 +126,7 @@ def gethopefear_only_exec kbest, feedback, gold, max, own_reference=nil hope = own_reference type1 = true else + # search for first (by decoder score) translation that gives the correct answer kbest.each_with_index { |k,i| next if i==0 break if i==max @@ -130,40 +138,7 @@ def gethopefear_only_exec kbest, feedback, gold, max, own_reference=nil } type2 = true end - kbest.each_with_index { |k,i| - next if i==0||i==hope_idx - break if i==max - if !exec(k.s, gold, true)[0] - fear = k - break - end - } - skip = true if !hope||!fear - return hope, fear, skip, type1, type2, new_reference -end - -def gethopefear_only_exec_simple kbest, feedback, gold, max, own_reference=nil - hope = fear = nil; hope_idx = 0; new_reference = nil - type1 = type2 = false - if feedback == true - hope = kbest[0] - new_reference = hope - type1 = true - elsif own_reference - hope = own_reference - type1 = true - else - kbest.each_with_index { |k,i| - next if i==0 - break if i==max - if exec(k.s, gold, true)[0] - hope_idx = i - hope = k - break - end - } - type2 = true - end + # --"-- doesn't give the correct answer kbest.each_with_index { |k,i| next if i==0||i==hope_idx break if i==max @@ -179,12 +154,14 @@ end def gethopefear_rampion kbest, reference hope = fear = nil type1 = type2 = false + # 1best is automatically hope if it matches reference if kbest[0].s == reference hope = kbest[0] - fear = hope_and_fear(kbest, 'fear') + fear = hope_and_fear kbest, 'fear' type1 = true else - hope = hope_and_fear(kbest, 'hope') + hope = hope_and_fear kbest, 'hope' + # 1best is automatically fear if it doesn't match reference fear = kbest[0] type2 = true end diff --git a/rampfion.rb b/rampfion.rb index 04aec09..9ce1e9a 100755 --- a/rampfion.rb +++ b/rampfion.rb @@ -6,20 +6,27 @@ require 'tempfile' require 'memcached' require_relative './hopefear' - -# FIXME +# edit here to change the parser SMT_SEMPARSE = 'python /workspace/grounded/smt-semparse-cp/decode_sentence.py /workspace/grounded/smt-semparse-cp/working/tgttosrc' + +# this is the 'fixed' version of eval.pl EVAL_PL = '/workspace/grounded/wasp-1.0/data/geo-funql/eval/eval.pl' + +# memcached hast to be running $cache = Memcached.new('localhost:11211') + def exec natural_language_string, reference_output, no_output=false mrl = output = feedback = nil + # this may cause collisions, but there are not so many German words that + # could have different Umlauts at the same position, e.g. Häuser => H?user key_prefix = natural_language_string.encode('ASCII', :invalid => :replace, :undef => :replace, :replace => '?').gsub(/ /,'_') begin mrl = $cache.get key_prefix+'__MRL' output = $cache.get key_prefix+'__OUTPUT' feedback = $cache.get key_prefix+'__FEEDBACK' rescue Memcached::NotFound + # beware: EVAL_PL sometimes hangs and can't be killed! mrl = spawn_with_timeout("#{SMT_SEMPARSE} \"#{natural_language_string}\" ", 60).strip output = spawn_with_timeout("echo \"execute_funql_query(#{mrl}, X).\" | swipl -s #{EVAL_PL} 2>&1 | grep \"X =\"", 60).strip.split('X = ')[1] feedback = output==reference_output @@ -74,49 +81,58 @@ end def main cfg = Trollop::options do - # data + # [data] opt :k, "k", :type => :int, :default => 100, :short => '-k' opt :input, "'foreign' input", :type => :string, :required => true, :short => '-i' opt :references, "(parseable) references", :type => :string, :required => true, :short => '-r' opt :gold, "gold output", :type => :string, :required => true, :short => '-g' + # just for debugging: opt :gold_mrl, "gold parse", :type => :string, :required => true, :short => '-h' opt :init_weights, "initial weights", :type => :string, :required => true, :short => '-w' opt :cdec_ini, "cdec config file", :type => :string, :required => true, :short => '-c' + # just used for 1best/hope variant detection opt :stopwords_file, "stopwords file", :type => :string, :default => 'd/stopwords.en', :short => '-t' - # output + # [output] opt :output_weights, "output file for final weights", :type => :string, :required => true, :short => '-o' opt :debug, "debug output", :type => :bool, :default => false, :short => '-d' opt :print_kbest, "print full kbest lists", :type => :bool, :default => false, :short => '-l' - # learning parameters + # [learning parameters] opt :eta, "learning rate", :type => :float, :default => 0.01, :short => '-e' opt :iterate, "iteration X epochs", :type => :int, :default => 1, :short => '-j' opt :stop_after, "stop after x examples", :type => :int, :default => -1, :short => '-s' opt :scale_model, "scale model scores by this factor", :type => :float, :default => 1.0, :short => '-m' opt :normalize, "normalize weights after each update", :type => :bool, :default => false, :short => '-n' + # don't use when 'bad' examples are filtered: opt :skip_on_no_proper_gold, "skip, if the reference didn't produce a proper gold output", :type => :bool, :default => false, :short => '-x' opt :no_update, "don't update weights", :type => :bool, :default => false, :short => '-y' + # don't use: opt :hope_fear_max, "# entries to consider when searching good hope/fear", :type => :int, :default => 10**10, :short => '-q' + # see hopefear.rb: opt :variant, "standard, rampion, fear_no_exec, fear_no_exec_skip, fear_no_exec_hope_exec, fear_no_exec_hope_exec_skip, only_exec", :default => 'standard', :short => '-v' end STDERR.write "CONFIGURATION\n" cfg.each_pair { |k,v| STDERR.write " #{k}=#{v}\n" } + # read data input = ReadFile.readlines_strip cfg[:input] references = ReadFile.readlines_strip cfg[:references] gold = ReadFile.readlines_strip cfg[:gold] gold_mrl = ReadFile.readlines_strip cfg[:gold_mrl] stopwords = ReadFile.readlines_strip cfg[:stopwords_file] + # only for 'only_exec' variant own_references = nil own_references = references.map{ |i| nil } if cfg[:variant]=='only_exec' + # initialize model w = SparseVector.from_file cfg[:init_weights], ' ' last_weights_fn = '' + # iterations loop cfg[:iterate].times { |iter| - # numerous counters + # (reset) numerous counters count = 0 without_translation = 0 no_proper_gold_output = 0 @@ -133,16 +149,19 @@ def main hope_true_variant = 0 kbest_sz = 0 + # input loop input.each_with_index { |i,j| break if cfg[:stop_after]>0&&count==cfg[:stop_after] count += 1 + # write weights to file for cdec tmp_file = Tempfile.new('rampion') tmp_file_path = tmp_file.path last_weights_fn = tmp_file.path tmp_file.write w.to_kv ' ', "\n" tmp_file.close + # get kbest list kbest = cdec_kbest '/toolbox/cdec-dtrain/decoder/cdec', i, cfg[:cdec_ini], tmp_file_path, cfg[:k] kbest_sz += kbest.size @@ -152,13 +171,15 @@ def main STDERR.write " GOLD MRL: #{gold_mrl[j]}\n" STDERR.write "GOLD OUTPUT: #{gold[j]}\n" + # translation failed if kbest.size == 0 without_translation += 1 STDERR.write "NO MT OUTPUT, skipping example\n" next end - if gold[j] == '[]' || gold[j] == '[...]' || gold[j] == '[].' + # don't use when data is filtered + if gold[j] == '[]' || gold[j] == '[...]' || gold[j] == '[].' || gold[j] == '[...].' no_proper_gold_output += 1 if cfg[:skip_on_no_proper_gold] STDERR.write "NO PROPER GOLD OUTPUT, skipping example\n" @@ -166,6 +187,7 @@ def main end end + # get per-sentence BLEU scores kbest.each { |k| k.scores[:psb] = BLEU::per_sentence_bleu k.s, references[j] } if cfg[:print_kbest] @@ -174,16 +196,20 @@ def main STDERR.write ">>>\n" end + # map decoder scores to [0,1] adjust_model_scores kbest, cfg[:scale_model] + # informative output STDERR.write "\n [TOP1]\n" + # print 1best on last iteration puts "#{kbest[0].s}" if iter+1==cfg[:iterate] + # execute 1best feedback, mrl, output = exec kbest[0].s, gold[j] STDERR.write " SCORES: #{kbest[0].scores.to_s}\n" top1_stats.update feedback, mrl, output - + # hope/fear variants hope = fear = new_reference = nil type1 = type2 = skip = false case cfg[:variant] @@ -206,6 +232,7 @@ def main exit 1 end + # for 'only_exec' variant if new_reference own_references[j] = new_reference end @@ -213,43 +240,52 @@ def main type1_updates+=1 if type1 type2_updates+=1 if type2 + # for string variant detection ref_words = bag_of_words references[j], stopwords if kbest[0].s == references[j] top1_hit += 1 - else + elsif feedback top1_variant += 1 top1_true_variant += 1 if !bag_of_words(kbest[0].s, stopwords).is_subset_of?(ref_words) end - if hope && hope.s==references[j] - hope_hit += 1 - elsif hope - hope_variant += 1 - hope_true_variant += 1 if !bag_of_words(hope.s, stopwords).is_subset_of?(ref_words) - end + # hope output & statistics STDERR.write "\n [HOPE]\n" if hope feedback, mrl, output = exec hope.s, gold[j] STDERR.write " SCORES: #{hope.scores.to_s}, ##{hope.rank}\n" hope_stats.update feedback, mrl, output + if hope.s==references[j] + hope_hit += 1 + elsif feedback + hope_variant += 1 + hope_true_variant += 1 if !bag_of_words(hope.s, stopwords).is_subset_of?(ref_words) + end end + + # fear output & statistics STDERR.write "\n [FEAR]\n" if fear feedback, mrl, output = exec fear.s, gold[j] STDERR.write " SCORES: #{fear.scores.to_s}, ##{fear.rank}\n" - fear_stats.update feedback, mrl, output + fear_stats.update feedback, mrl, output end + # skip if needed if skip || !hope || !fear STDERR.write "NO GOOD HOPE/FEAR, skipping example\n\n" next end + # update w += (hope.f - fear.f) * cfg[:eta] if !cfg[:no_update] + + # normalize model w.normalize! if cfg[:normalize] } + # save all weights if cfg[:iterate] > 1 WriteFile.write ReadFile.read(last_weights_fn), "#{cfg[:output_weights]}.#{iter}.gz" else |