summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--hopefear.rb67
-rwxr-xr-xrampfion.rb68
2 files changed, 74 insertions, 61 deletions
diff --git a/hopefear.rb b/hopefear.rb
index 918b71f..ef1fd43 100644
--- a/hopefear.rb
+++ b/hopefear.rb
@@ -19,10 +19,10 @@ def gethopefear_standard kbest, feedback
hope = kbest[0]
type1 = true
else
- hope = hope_and_fear(kbest, 'hope')
+ hope = hope_and_fear kbest, 'hope'
type2 = true
end
- fear = hope_and_fear(kbest, 'fear')
+ fear = hope_and_fear kbest, 'fear'
return hope, fear, false, type1, type2
end
@@ -33,10 +33,12 @@ def gethopefear_fear_no_exec kbest, feedback, gold, max
hope = kbest[0]
type1 = true
else
- hope = hope_and_fear(kbest, 'hope')
+ hope = hope_and_fear kbest, 'hope'
type2 = true
end
- kbest.sort{|x,y|(y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb])}.each_with_index { |k,i|
+ # sorted in descending order by max(decoder, psb), best ('hope') first
+ # select the 'best' translation that does not deliver the correct answer
+ kbest.sort{ |x,y| (y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb]) }.each_with_index { |k,i|
break if i==max
if !exec(k.s, gold, true)[0]
fear = k
@@ -54,10 +56,11 @@ def gethopefear_fear_no_exec_skip kbest, feedback, gold
hope = kbest[0]
type1 = true
else
- hope = hope_and_fear(kbest, 'hope')
+ hope = hope_and_fear kbest, 'hope'
type2 = true
end
fear = hope_and_fear(kbest, 'fear')
+ # skip example if fear gives the right answer
skip = exec(fear.s, gold, true)[0]
return hope, fear, skip, type1, type2
end
@@ -65,11 +68,13 @@ end
def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max
hope = fear = nil; hope_idx = 0
type1 = type2 = false
- sorted_kbest = kbest.sort{|x,y|(y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb])}
+ # sorted in descending order by max(decoder, psb), best ('hope') first
+ sorted_kbest = kbest.sort{ |x,y| (y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb]) }
if feedback == true
hope = kbest[0]
type1 = true
else
+ # select 'best' translation that correctly executes
sorted_kbest.each_with_index { |k,i|
next if i==0
break if i==max
@@ -81,6 +86,7 @@ def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max
}
type2 = true
end
+ # select 'best' translation that does not correctly execute
sorted_kbest.each_with_index { |k,i|
break if i>(kbest.size-(hope_idx+1))||i==max
if !exec(k.s, gold, true)[0]
@@ -88,6 +94,7 @@ def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max
break
end
}
+ # skip if hope or fear could no be found
skip = true if !hope||!fear
return hope, fear, skip, type1, type2
end
@@ -99,15 +106,15 @@ def gethopefear_fear_no_exec_hope_exec_skip kbest, feedback, gold, max
hope = kbest[0]
type1 = true
else
- hope = hope_and_fear(kbest, 'hope')
+ hope = hope_and_fear kbest, 'hope'
type2 = true
end
- fear = hope_and_fear(kbest, 'fear')
+ fear = hope_and_fear kbest, 'fear'
+ # skip if fear executes correctly or hope doesn't
skip = exec(fear.s, gold, true)[0]||!exec(hope.s, gold, true)[0]
return hope, fear, skip, type1, type2
end
-
def gethopefear_only_exec kbest, feedback, gold, max, own_reference=nil
hope = fear = nil; hope_idx = 0; new_reference = nil
type1 = type2 = false
@@ -119,6 +126,7 @@ def gethopefear_only_exec kbest, feedback, gold, max, own_reference=nil
hope = own_reference
type1 = true
else
+ # search for first (by decoder score) translation that gives the correct answer
kbest.each_with_index { |k,i|
next if i==0
break if i==max
@@ -130,40 +138,7 @@ def gethopefear_only_exec kbest, feedback, gold, max, own_reference=nil
}
type2 = true
end
- kbest.each_with_index { |k,i|
- next if i==0||i==hope_idx
- break if i==max
- if !exec(k.s, gold, true)[0]
- fear = k
- break
- end
- }
- skip = true if !hope||!fear
- return hope, fear, skip, type1, type2, new_reference
-end
-
-def gethopefear_only_exec_simple kbest, feedback, gold, max, own_reference=nil
- hope = fear = nil; hope_idx = 0; new_reference = nil
- type1 = type2 = false
- if feedback == true
- hope = kbest[0]
- new_reference = hope
- type1 = true
- elsif own_reference
- hope = own_reference
- type1 = true
- else
- kbest.each_with_index { |k,i|
- next if i==0
- break if i==max
- if exec(k.s, gold, true)[0]
- hope_idx = i
- hope = k
- break
- end
- }
- type2 = true
- end
+ # --"-- doesn't give the correct answer
kbest.each_with_index { |k,i|
next if i==0||i==hope_idx
break if i==max
@@ -179,12 +154,14 @@ end
def gethopefear_rampion kbest, reference
hope = fear = nil
type1 = type2 = false
+ # 1best is automatically hope if it matches reference
if kbest[0].s == reference
hope = kbest[0]
- fear = hope_and_fear(kbest, 'fear')
+ fear = hope_and_fear kbest, 'fear'
type1 = true
else
- hope = hope_and_fear(kbest, 'hope')
+ hope = hope_and_fear kbest, 'hope'
+ # 1best is automatically fear if it doesn't match reference
fear = kbest[0]
type2 = true
end
diff --git a/rampfion.rb b/rampfion.rb
index 04aec09..9ce1e9a 100755
--- a/rampfion.rb
+++ b/rampfion.rb
@@ -6,20 +6,27 @@ require 'tempfile'
require 'memcached'
require_relative './hopefear'
-
-# FIXME
+# edit here to change the parser
SMT_SEMPARSE = 'python /workspace/grounded/smt-semparse-cp/decode_sentence.py /workspace/grounded/smt-semparse-cp/working/tgttosrc'
+
+# this is the 'fixed' version of eval.pl
EVAL_PL = '/workspace/grounded/wasp-1.0/data/geo-funql/eval/eval.pl'
+
+# memcached hast to be running
$cache = Memcached.new('localhost:11211')
+
def exec natural_language_string, reference_output, no_output=false
mrl = output = feedback = nil
+ # this may cause collisions, but there are not so many German words that
+ # could have different Umlauts at the same position, e.g. Häuser => H?user
key_prefix = natural_language_string.encode('ASCII', :invalid => :replace, :undef => :replace, :replace => '?').gsub(/ /,'_')
begin
mrl = $cache.get key_prefix+'__MRL'
output = $cache.get key_prefix+'__OUTPUT'
feedback = $cache.get key_prefix+'__FEEDBACK'
rescue Memcached::NotFound
+ # beware: EVAL_PL sometimes hangs and can't be killed!
mrl = spawn_with_timeout("#{SMT_SEMPARSE} \"#{natural_language_string}\" ", 60).strip
output = spawn_with_timeout("echo \"execute_funql_query(#{mrl}, X).\" | swipl -s #{EVAL_PL} 2>&1 | grep \"X =\"", 60).strip.split('X = ')[1]
feedback = output==reference_output
@@ -74,49 +81,58 @@ end
def main
cfg = Trollop::options do
- # data
+ # [data]
opt :k, "k", :type => :int, :default => 100, :short => '-k'
opt :input, "'foreign' input", :type => :string, :required => true, :short => '-i'
opt :references, "(parseable) references", :type => :string, :required => true, :short => '-r'
opt :gold, "gold output", :type => :string, :required => true, :short => '-g'
+ # just for debugging:
opt :gold_mrl, "gold parse", :type => :string, :required => true, :short => '-h'
opt :init_weights, "initial weights", :type => :string, :required => true, :short => '-w'
opt :cdec_ini, "cdec config file", :type => :string, :required => true, :short => '-c'
+ # just used for 1best/hope variant detection
opt :stopwords_file, "stopwords file", :type => :string, :default => 'd/stopwords.en', :short => '-t'
- # output
+ # [output]
opt :output_weights, "output file for final weights", :type => :string, :required => true, :short => '-o'
opt :debug, "debug output", :type => :bool, :default => false, :short => '-d'
opt :print_kbest, "print full kbest lists", :type => :bool, :default => false, :short => '-l'
- # learning parameters
+ # [learning parameters]
opt :eta, "learning rate", :type => :float, :default => 0.01, :short => '-e'
opt :iterate, "iteration X epochs", :type => :int, :default => 1, :short => '-j'
opt :stop_after, "stop after x examples", :type => :int, :default => -1, :short => '-s'
opt :scale_model, "scale model scores by this factor", :type => :float, :default => 1.0, :short => '-m'
opt :normalize, "normalize weights after each update", :type => :bool, :default => false, :short => '-n'
+ # don't use when 'bad' examples are filtered:
opt :skip_on_no_proper_gold, "skip, if the reference didn't produce a proper gold output", :type => :bool, :default => false, :short => '-x'
opt :no_update, "don't update weights", :type => :bool, :default => false, :short => '-y'
+ # don't use:
opt :hope_fear_max, "# entries to consider when searching good hope/fear", :type => :int, :default => 10**10, :short => '-q'
+ # see hopefear.rb:
opt :variant, "standard, rampion, fear_no_exec, fear_no_exec_skip, fear_no_exec_hope_exec, fear_no_exec_hope_exec_skip, only_exec", :default => 'standard', :short => '-v'
end
STDERR.write "CONFIGURATION\n"
cfg.each_pair { |k,v| STDERR.write " #{k}=#{v}\n" }
+ # read data
input = ReadFile.readlines_strip cfg[:input]
references = ReadFile.readlines_strip cfg[:references]
gold = ReadFile.readlines_strip cfg[:gold]
gold_mrl = ReadFile.readlines_strip cfg[:gold_mrl]
stopwords = ReadFile.readlines_strip cfg[:stopwords_file]
+ # only for 'only_exec' variant
own_references = nil
own_references = references.map{ |i| nil } if cfg[:variant]=='only_exec'
+ # initialize model
w = SparseVector.from_file cfg[:init_weights], ' '
last_weights_fn = ''
+ # iterations loop
cfg[:iterate].times { |iter|
- # numerous counters
+ # (reset) numerous counters
count = 0
without_translation = 0
no_proper_gold_output = 0
@@ -133,16 +149,19 @@ def main
hope_true_variant = 0
kbest_sz = 0
+ # input loop
input.each_with_index { |i,j|
break if cfg[:stop_after]>0&&count==cfg[:stop_after]
count += 1
+ # write weights to file for cdec
tmp_file = Tempfile.new('rampion')
tmp_file_path = tmp_file.path
last_weights_fn = tmp_file.path
tmp_file.write w.to_kv ' ', "\n"
tmp_file.close
+ # get kbest list
kbest = cdec_kbest '/toolbox/cdec-dtrain/decoder/cdec', i, cfg[:cdec_ini], tmp_file_path, cfg[:k]
kbest_sz += kbest.size
@@ -152,13 +171,15 @@ def main
STDERR.write " GOLD MRL: #{gold_mrl[j]}\n"
STDERR.write "GOLD OUTPUT: #{gold[j]}\n"
+ # translation failed
if kbest.size == 0
without_translation += 1
STDERR.write "NO MT OUTPUT, skipping example\n"
next
end
- if gold[j] == '[]' || gold[j] == '[...]' || gold[j] == '[].'
+ # don't use when data is filtered
+ if gold[j] == '[]' || gold[j] == '[...]' || gold[j] == '[].' || gold[j] == '[...].'
no_proper_gold_output += 1
if cfg[:skip_on_no_proper_gold]
STDERR.write "NO PROPER GOLD OUTPUT, skipping example\n"
@@ -166,6 +187,7 @@ def main
end
end
+ # get per-sentence BLEU scores
kbest.each { |k| k.scores[:psb] = BLEU::per_sentence_bleu k.s, references[j] }
if cfg[:print_kbest]
@@ -174,16 +196,20 @@ def main
STDERR.write ">>>\n"
end
+ # map decoder scores to [0,1]
adjust_model_scores kbest, cfg[:scale_model]
+ # informative output
STDERR.write "\n [TOP1]\n"
+ # print 1best on last iteration
puts "#{kbest[0].s}" if iter+1==cfg[:iterate]
+ # execute 1best
feedback, mrl, output = exec kbest[0].s, gold[j]
STDERR.write " SCORES: #{kbest[0].scores.to_s}\n"
top1_stats.update feedback, mrl, output
-
+ # hope/fear variants
hope = fear = new_reference = nil
type1 = type2 = skip = false
case cfg[:variant]
@@ -206,6 +232,7 @@ def main
exit 1
end
+ # for 'only_exec' variant
if new_reference
own_references[j] = new_reference
end
@@ -213,43 +240,52 @@ def main
type1_updates+=1 if type1
type2_updates+=1 if type2
+ # for string variant detection
ref_words = bag_of_words references[j], stopwords
if kbest[0].s == references[j]
top1_hit += 1
- else
+ elsif feedback
top1_variant += 1
top1_true_variant += 1 if !bag_of_words(kbest[0].s, stopwords).is_subset_of?(ref_words)
end
- if hope && hope.s==references[j]
- hope_hit += 1
- elsif hope
- hope_variant += 1
- hope_true_variant += 1 if !bag_of_words(hope.s, stopwords).is_subset_of?(ref_words)
- end
+ # hope output & statistics
STDERR.write "\n [HOPE]\n"
if hope
feedback, mrl, output = exec hope.s, gold[j]
STDERR.write " SCORES: #{hope.scores.to_s}, ##{hope.rank}\n"
hope_stats.update feedback, mrl, output
+ if hope.s==references[j]
+ hope_hit += 1
+ elsif feedback
+ hope_variant += 1
+ hope_true_variant += 1 if !bag_of_words(hope.s, stopwords).is_subset_of?(ref_words)
+ end
end
+
+ # fear output & statistics
STDERR.write "\n [FEAR]\n"
if fear
feedback, mrl, output = exec fear.s, gold[j]
STDERR.write " SCORES: #{fear.scores.to_s}, ##{fear.rank}\n"
- fear_stats.update feedback, mrl, output
+ fear_stats.update feedback, mrl, output
end
+ # skip if needed
if skip || !hope || !fear
STDERR.write "NO GOOD HOPE/FEAR, skipping example\n\n"
next
end
+ # update
w += (hope.f - fear.f) * cfg[:eta] if !cfg[:no_update]
+
+ # normalize model
w.normalize! if cfg[:normalize]
}
+ # save all weights
if cfg[:iterate] > 1
WriteFile.write ReadFile.read(last_weights_fn), "#{cfg[:output_weights]}.#{iter}.gz"
else