summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2014-02-16 00:05:48 +0100
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2014-02-16 00:05:48 +0100
commit0023defd7a3c2ecde219f1364ed06668ec59186a (patch)
tree27b6e901aa1043e3ff85982ffec9a7986a28369d
parentcbafa90cb1a6b363b797c0f889c1c35749dee874 (diff)
proper
-rw-r--r--.gitignore3
-rw-r--r--hopefear.rb14
-rwxr-xr-xrampfion.rb126
3 files changed, 65 insertions, 78 deletions
diff --git a/.gitignore b/.gitignore
index 5f28f5b..1272d07 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,4 @@
prototype
+proper
+nlp_ruby
+rampion_with_feedback.rb
diff --git a/hopefear.rb b/hopefear.rb
index 0423d26..918b71f 100644
--- a/hopefear.rb
+++ b/hopefear.rb
@@ -1,12 +1,12 @@
def hope_and_fear kbest, action
max = -1.0/0
max_idx = -1
- kbest.each_with_index { |i,j|
- if action=='hope' && i.score + i.other_score > max
- max_idx = j; max = i.score + i.other_score
+ kbest.each_with_index { |k,i|
+ if action=='hope' && k.scores[:decoder] + k.scores[:psb] > max
+ max_idx = i; max = k.scores[:decoder] + k.scores[:psb]
end
- if action=='fear' && i.score - i.other_score > max
- max_idx = j; max = i.score - i.other_score
+ if action=='fear' && k.scores[:decoder] - k.scores[:psb] > max
+ max_idx = i; max = k.scores[:decoder] - k.scores[:psb]
end
}
return kbest[max_idx]
@@ -36,7 +36,7 @@ def gethopefear_fear_no_exec kbest, feedback, gold, max
hope = hope_and_fear(kbest, 'hope')
type2 = true
end
- kbest.sort{|x,y|(y.score+y.other_score)<=>(x.score+x.other_score)}.each_with_index { |k,i|
+ kbest.sort{|x,y|(y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb])}.each_with_index { |k,i|
break if i==max
if !exec(k.s, gold, true)[0]
fear = k
@@ -65,7 +65,7 @@ end
def gethopefear_fear_no_exec_hope_exec kbest, feedback, gold, max
hope = fear = nil; hope_idx = 0
type1 = type2 = false
- sorted_kbest = kbest.sort{|x,y|(y.score+y.other_score)<=>(x.score+x.other_score)}
+ sorted_kbest = kbest.sort{|x,y|(y.scores[:decoder]+y.scores[:psb])<=>(x.scores[:decoder]+x.scores[:psb])}
if feedback == true
hope = kbest[0]
type1 = true
diff --git a/rampfion.rb b/rampfion.rb
index 3ff216e..72680bb 100755
--- a/rampfion.rb
+++ b/rampfion.rb
@@ -7,40 +7,36 @@ require 'memcached'
require_relative './hopefear'
-SMT_SEMPARSE = 'python /workspace/grounded/smt-semparse-cp/decode_sentence.py /workspace/grounded/smt-semparse-cp/working/full_dataset 2>/dev/null'
+SMT_SEMPARSE = 'python /workspace/grounded/smt-semparse-cp/decode_sentence.py /workspace/grounded/smt-semparse-cp/working/full_dataset'
EVAL_PL = '/workspace/grounded/wasp-1.0/data/geo-funql/eval/eval.pl'
$cache = Memcached.new("localhost:11211")
def exec natural_language_string, reference_output, no_output=false
- func = nil
- output = nil
- feedback = nil
+ mrl = output = feedback = nil
key_prefix = natural_language_string.encode("ASCII", :invalid => :replace, :undef => :replace, :replace => "?").gsub(/ /,'_')
begin
- func = $cache.get key_prefix+"__FUNC"
- output = $cache.get key_prefix+"__OUTPUT"
+ mrl = $cache.get key_prefix+"__MRL"
+ output = $cache.get key_prefix+"__OUTPUT"
feedback = $cache.get key_prefix+"__FEEDBACK"
rescue Memcached::NotFound
- #func = spawn_with_timeout("#{SMT_SEMPARSE} \"#{natural_language_string}\"").strip
- func = `#{SMT_SEMPARSE} "#{natural_language_string}"`.strip
- #output = spawn_with_timeout("echo \"execute_funql_query(#{func}, X).\" | swipl -s #{EVAL_PL} 2>&1 | grep \"X =\"").strip.split('X = ')[1]
- output = `echo "execute_funql_query(#{func}, X)." | swipl -s #{EVAL_PL} 2>&1 | grep "X ="`.strip.split('X = ')[1]
+ mrl = spawn_with_timeout("#{SMT_SEMPARSE} \"#{natural_language_string}\" ", 60).strip
+ output = spawn_with_timeout("echo \"execute_funql_query(#{mrl}, X).\" | swipl -s #{EVAL_PL} 2>&1 | grep \"X =\"", 60).strip.split('X = ')[1]
feedback = output==reference_output
begin
- $cache.set key_prefix+"__FUNC", func
+ $cache.set key_prefix+"__MRL", mrl
$cache.set key_prefix+"__OUTPUT", output
$cache.set key_prefix+"__FEEDBACK", feedback
rescue SystemExit, Interrupt
- $cache.delete key_prefix+"__FUNC"
+ $cache.delete key_prefix+"__MRL"
$cache.delete key_prefix+"__OUTPUT"
$cache.delete key_prefix+"__FEEDBACK"
end
end
STDERR.write " nrl: #{natural_language_string}\n" if !no_output
- STDERR.write " mrl: #{func}\n" if !no_output
+ STDERR.write " mrl: #{mrl}\n" if !no_output
STDERR.write " output: #{output}\n" if !no_output
STDERR.write " correct?: #{feedback}\n" if !no_output
- return feedback, func, output
+ return feedback, mrl, output
end
class Stats
@@ -52,10 +48,9 @@ class Stats
@with_correct_output = 0.0
end
- # FIXME
- def update feedback, func, output
- @with_parse +=1 if func!='None'&&func!=''
- @with_output +=1 if output!='null'&&output!=''
+ def update feedback, mrl, output
+ @with_parse += 1 if mrl!=''
+ @with_output += 1 if output!=''
@with_correct_output += 1 if feedback==true
end
@@ -69,60 +64,52 @@ eos
end
end
-# map model scores to lie within [0,1]
def adjust_model_scores kbest, factor
- min = kbest.map{ |k| k.score }.min
- max = kbest.map{ |k| k.score }.max
- kbest.each { |k| k.score = factor*((k.score-min)/(max-min)) }
-end
-
-def update model, hope, fear, eta
- diff = hope.f - fear.f
- diff *= eta
- model += diff
- return model
+ min = kbest.map{ |k| k.scores[:decoder] }.min
+ max = kbest.map{ |k| k.scores[:decoder] }.max
+ kbest.each { |k| k.scores[:decoder] = factor*((k.scores[:decoder]-min)/(max-min)) }
end
def main
cfg = Trollop::options do
# data
- opt :k, "k", :type => :int, :default => 10000, :short => '-k'
- opt :input, "'foreign' input", :type => :string, :required => true, :short => '-i'
- opt :references, "(parseable) references", :type => :string, :required => true, :short => '-r'
- opt :gold, "gold output", :type => :string, :required => true, :short => '-g'
- opt :gold_mrl, "gold parse", :type => :string, :required => true, :short => '-h'
- opt :init_weights, "initial weights", :type => :string, :required => true, :short => '-w'
- opt :cdec_ini, "cdec config file", :type => :string, :required => true, :short => '-c'
+ opt :k, "k", :type => :int, :default => 100, :short => '-k'
+ opt :input, "'foreign' input", :type => :string, :required => true, :short => '-i'
+ opt :references, "(parseable) references", :type => :string, :required => true, :short => '-r'
+ opt :gold, "gold output", :type => :string, :required => true, :short => '-g'
+ opt :gold_mrl, "gold parse", :type => :string, :required => true, :short => '-h'
+ opt :init_weights, "initial weights", :type => :string, :required => true, :short => '-w'
+ opt :cdec_ini, "cdec config file", :type => :string, :required => true, :short => '-c'
+ opt :stopwords_file, "stopwords file", :type => :string, :default => 'd/stopwords.en', :short => '-t'
# output
opt :output_weights, "output file for final weights", :type => :string, :required => true, :short => '-o'
opt :debug, "debug output", :type => :bool, :default => false, :short => '-d'
opt :print_kbest, "print full kbest lists", :type => :bool, :default => false, :short => '-l'
# learning parameters
- opt :eta, "learning rate", :type => :float, :default => 0.01, :short => '-e'
- opt :iterate, "iteration X epochs", :type => :int, :default => 1, :short => '-j'
- opt :stop_after, "stop after x examples", :type => :int, :default => -1, :short => '-s'
- opt :scale_model, "scale model scores by this factor", :type => :float, :default => 1.0, :short => '-m'
- opt :normalize, "normalize weights after each update", :type => :bool, :default => false, :short => '-n'
- opt :skip_on_no_proper_gold, "skip, if the reference didn't produce a proper gold output", :type => :bool, :default => false, :short => '-x'
- opt :no_update, "don't update weights", :type => :bool, :default => false, :short => '-y'
- opt :hope_fear_max, "FIXME", :type => :int, :default => 32, :short => '-q'
+ opt :eta, "learning rate", :type => :float, :default => 0.01, :short => '-e'
+ opt :iterate, "iteration X epochs", :type => :int, :default => 1, :short => '-j'
+ opt :stop_after, "stop after x examples", :type => :int, :default => -1, :short => '-s'
+ opt :scale_model, "scale model scores by this factor", :type => :float, :default => 1.0, :short => '-m'
+ opt :normalize, "normalize weights after each update", :type => :bool, :default => false, :short => '-n'
+ opt :skip_on_no_proper_gold, "skip, if the reference didn't produce a proper gold output", :type => :bool, :default => false, :short => '-x'
+ opt :no_update, "don't update weights", :type => :bool, :default => false, :short => '-y'
+ opt :hope_fear_max, "# entries to consider when searching good hope/fear", :type => :int, :default => 10**10, :short => '-q'
opt :variant, "standard, rampion, fear_no_exec, fear_no_exec_skip, fear_no_exec_hope_exec, fear_no_exec_hope_exec_skip, only_exec", :default => 'standard', :short => '-v'
end
STDERR.write "CONFIGURATION\n"
cfg.each_pair { |k,v| STDERR.write " #{k}=#{v}\n" }
- input = ReadFile.new(cfg[:input]).readlines_strip
- references = ReadFile.new(cfg[:references]).readlines_strip
- gold = ReadFile.new(cfg[:gold]).readlines_strip
- gold_mrl = ReadFile.new(cfg[:gold_mrl]).readlines_strip # FIXME => prolog!
- stopwords = ReadFile.new('prototype/d/stopwords.en').readlines_strip
+ input = ReadFile.readlines_strip cfg[:input]
+ references = ReadFile.readlines_strip cfg[:references]
+ gold = ReadFile.readlines_strip cfg[:gold]
+ gold_mrl = ReadFile.readlines_strip cfg[:gold_mrl]
+ stopwords = ReadFile.readlines_strip cfg[:stopwords_file]
own_references = nil
own_references = references.map{ |i| nil } if cfg[:variant]=='only_exec'
- w = SparseVector.new
- w.from_kv_file cfg[:init_weights]
+ w = SparseVector.from_file cfg[:init_weights]
last_weights_fn = ''
cfg[:iterate].times { |iter|
@@ -134,7 +121,6 @@ def main
top1_stats = Stats.new 'top1'
hope_stats = Stats.new 'hope'
fear_stats = Stats.new 'fear'
- refs_stats = Stats.new 'refs'
type1_updates = 0
type2_updates = 0
top1_hit = 0
@@ -146,19 +132,21 @@ def main
kbest_sz = 0
input.each_with_index { |i,j|
+ break if cfg[:stop_after]>0&&count==cfg[:stop_after]
count += 1
tmp_file = Tempfile.new('rampion')
tmp_file_path = tmp_file.path
last_weights_fn = tmp_file.path
- tmp_file.write w.to_kv ' '
+ tmp_file.write w.to_kv ' ', "\n"
tmp_file.close
- kbest = CDEC::kbest i, cfg[:cdec_ini], tmp_file_path, cfg[:k]
+ kbest = cdec_kbest '/workspace/grounded/mt-system/cdec/decoder/cdec', i, cfg[:cdec_ini], tmp_file_path, cfg[:k]
kbest_sz += kbest.size
STDERR.write "\n=================\n"
STDERR.write " EXAMPLE: #{j}\n"
+ STDERR.write " REFERENCE: #{references[j]}\n"
STDERR.write " GOLD MRL: #{gold_mrl[j]}\n"
STDERR.write "GOLD OUTPUT: #{gold[j]}\n"
@@ -176,22 +164,22 @@ def main
end
end
- kbest.each { |k| k.other_score = BLEU::per_sentence_bleu k.s, references[j] }
+ kbest.each { |k| k.scores[:psb] = BLEU::per_sentence_bleu k.s, references[j] }
if cfg[:print_kbest]
STDERR.write "\n<<< KBEST\n"
- kbest.each_with_index { |k,l| STDERR.write k.to_s+"\n" }
+ kbest.each_with_index { |k,l| STDERR.write k.to_s2+"\n" }
STDERR.write ">>>\n"
end
adjust_model_scores kbest, cfg[:scale_model]
STDERR.write "\n [TOP1]\n"
- STDERR.write "#{kbest[0].s}\n"
puts "#{kbest[0].s}" if iter+1==cfg[:iterate]
- feedback, func, output = exec kbest[0].s, gold[j]
- top1_stats.update feedback, func, output
+ feedback, mrl, output = exec kbest[0].s, gold[j]
+ STDERR.write " SCORES: #{kbest[0].scores.to_s}\n"
+ top1_stats.update feedback, mrl, output
hope = fear = new_reference = nil
@@ -240,31 +228,28 @@ def main
STDERR.write "\n [HOPE]\n"
if hope
- feedback, func, output = exec hope.s, gold[j]
- hope_stats.update feedback, func, output
+ feedback, mrl, output = exec hope.s, gold[j]
+ STDERR.write " SCORES: #{hope.scores.to_s}, ##{hope.rank}\n"
+ hope_stats.update feedback, mrl, output
end
STDERR.write "\n [FEAR]\n"
if fear
- feedback, func, output = exec fear.s, gold[j]
- fear_stats.update feedback, func, output
+ feedback, mrl, output = exec fear.s, gold[j]
+ STDERR.write " SCORES: #{fear.scores.to_s}, ##{fear.rank}\n"
+ fear_stats.update feedback, mrl, output
end
- STDERR.write "\n [REFERENCE]\n"
- feedback, func, output = exec references[j], gold[j]
- refs_stats.update feedback, func, output
if skip || !hope || !fear
STDERR.write "NO GOOD HOPE/FEAR, skipping example\n\n"
next
end
- w = update w, hope, fear, cfg[:eta] if !cfg[:no_update]
+ w += (hope.f - fear.f) * cfg[:eta] if !cfg[:no_update]
w.normalize! if cfg[:normalize]
-
- break if cfg[:stop_after]>0&&(j+1)==cfg[:stop_after]
}
if cfg[:iterate] > 1
- WriteFile.new("#{cfg[:output_weights]}.#{iter}.gz").write(ReadFile.new(last_weights_fn).read)
+ WriteFile.write ReadFile.read(last_weights_fn), "#{cfg[:output_weights]}.#{iter}.gz"
else
FileUtils::cp(last_weights_fn, cfg[:output_weights])
end
@@ -288,7 +273,6 @@ def main
#{top1_stats.to_s count}
#{hope_stats.to_s count}
#{fear_stats.to_s count}
-#{refs_stats.to_s count}
eos