From 4dd862953695849412cef0d8d840266a0a8c86cf Mon Sep 17 00:00:00 2001 From: carhaas Date: Tue, 9 Sep 2014 09:31:07 +0200 Subject: add free917 --- rebol.rb | 126 +++++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 98 insertions(+), 28 deletions(-) (limited to 'rebol.rb') diff --git a/rebol.rb b/rebol.rb index 04ace1c..b814d89 100755 --- a/rebol.rb +++ b/rebol.rb @@ -1,37 +1,80 @@ #!/usr/bin/env ruby -require 'zipf' +require 'nlp_ruby' require 'trollop' require 'tempfile' require 'memcached' require 'digest' require_relative './hopefear' +require 'pty' +require 'expect' +# memcached has to be running +$cache = Memcached.new('localhost:11211') -def exec natural_language_string, reference_output, no_output=false +def exec natural_language_string, reference_output, corpus, no_output=false mrl = output = feedback = nil # this may cause collisions, but there are not so many German words that # could have different Umlauts at the same position, e.g. Häuser => H?user key_prefix = Digest::SHA1.hexdigest(natural_language_string.encode('ASCII', :invalid => :replace, :undef => :replace, :replace => '?').gsub(/ /,'_')) - begin - mrl = $cache.get key_prefix+'__MRL' - output = $cache.get key_prefix+'__OUTPUT' - feedback = $cache.get key_prefix+'__FEEDBACK' - rescue Memcached::NotFound - mrl_cmd = "#{SMT_SEMPARSE} \"#{natural_language_string.gsub('"', ' ')}\"" - # beware: EVAL_PL sometimes hangs and can't be killed! - mrl = spawn_with_timeout(mrl_cmd, TIMEOUT, ACCEPT_ZOMBIES).strip - output = spawn_with_timeout("echo \"execute_funql_query(#{mrl}, X).\" | swipl -s #{EVAL_PL} 2>&1 | grep \"X =\"", TIMEOUT).strip.split('X = ')[1] - feedback = output==reference_output + if corpus == 'geoquery' begin - $cache.set key_prefix+'__MRL', mrl - $cache.set key_prefix+'__OUTPUT', output - $cache.set key_prefix+'__FEEDBACK', feedback - rescue SystemExit, Interrupt - $cache.delete key_prefix+'__MRL' - $cache.delete key_prefix+'__OUTPUT' - $cache.delete key_prefix+'__FEEDBACK"' + mrl = $cache.get key_prefix+'__MRL' + output = $cache.get key_prefix+'__OUTPUT' + feedback = $cache.get key_prefix+'__FEEDBACK' + rescue Memcached::NotFound + mrl_cmd = "#{SMT_SEMPARSE} \"#{natural_language_string}\"" + # beware: EVAL_PL sometimes hangs and can't be killed! + mrl = spawn_with_timeout(mrl_cmd, TIMEOUT, ACCEPT_ZOMBIES).strip + output = spawn_with_timeout("echo \"execute_funql_query(#{mrl}, X).\" | swipl -s #{ EVAL_PL} 2>&1 | grep \"X =\"", TIMEOUT).strip.split('X = ')[1] + feedback = output==reference_output + begin + $cache.set key_prefix+'__MRL', mrl + $cache.set key_prefix+'__OUTPUT', output + $cache.set key_prefix+'__FEEDBACK', feedback + rescue SystemExit, Interrupt + $cache.delete key_prefix+'__MRL' + $cache.delete key_prefix+'__OUTPUT' + $cache.delete key_prefix+'__FEEDBACK"' + end + end + elsif corpus == 'free917' + begin + mrl = $cache.get key_prefix+'__MRL' + output = $cache.get key_prefix+'__OUTPUT' + feedback = $cache.get key_prefix+'__FEEDBACK' + rescue Memcached::NotFound + mrl = "not available"#the parser for freebase doesn't give a mrl, just the answer + output = "" + #STDERR.write "#{natural_language_string}\n" + @in.printf("#{natural_language_string}\n") + result = @out.expect(/^> /,TIMEOUT) + if result!=nil + result[0].delete!("\r\n") + result[0].delete!("\n") + result[0].delete!("\r") + matchData = result[0].match(/Top value { (.*) }>/) + if matchData!=nil + save = matchData[1].gsub(/^ */,"") + save = save.gsub(/ *$/,"") + save = save.gsub(/ +/," ") + output = save + #STDERR.write output + end + end + feedback = output==reference_output + begin + $cache.set key_prefix+'__MRL', mrl + $cache.set key_prefix+'__OUTPUT', output + $cache.set key_prefix+'__FEEDBACK', feedback + rescue SystemExit, Interrupt + $cache.delete key_prefix+'__MRL' + $cache.delete key_prefix+'__OUTPUT' + $cache.delete key_prefix+'__FEEDBACK"' + end end + + end STDERR.write " nrl: #{natural_language_string}\n" if !no_output STDERR.write " mrl: #{mrl}\n" if !no_output @@ -84,6 +127,7 @@ def main opt :init_weights, "initial weights", :type => :string, :required => true, :short => '-w' opt :global_vars, "semantic parser, cdec bin, eval.pl", :type => :string, :required => true, :short => '-b' opt :cdec_ini, "cdec config file", :type => :string, :required => true, :short => '-c' + opt :model, "parser model", :type => :int, :default => 0, :short => '-z' # just used for 1best/hope variant detection opt :stopwords_file, "stopwords file", :type => :string, :default => 'd/stopwords.en', :short => '-t' # [output] @@ -103,14 +147,14 @@ def main opt :hope_fear_max, "# entries to consider when searching good hope/fear", :type => :int, :default => 10**10, :short => '-q' # see hopefear.rb: opt :variant, "rampion, rebol, rebol_light, exec", :type => :string, :default => 'rampion', :short => '-v' + opt :corpus, "corpus: either geoquery or free917", :type => :string, :required => true, :short => '-u' end require_relative cfg[:global_vars] STDERR.write "CONFIGURATION\n" cfg.each_pair { |k,v| STDERR.write " #{k}=#{v}\n" } - STDERR.write "SMT_SEMPARSE=#{SMT_SEMPARSE}\n" - STDERR.write "EVAL_PL=#{EVAL_PL}\n" - STDERR.write "CDEC_BIN=#{CDEC_BIN}\n\n" + STDERR.write "CDEC_BIN=#{CDEC_BIN}\n" + # read data input = ReadFile.readlines_strip cfg[:input] @@ -118,6 +162,31 @@ def main gold = ReadFile.readlines_strip cfg[:gold] gold_mrl = ReadFile.readlines_strip cfg[:gold_mrl] stopwords = ReadFile.readlines_strip cfg[:stopwords_file] + corpus = "" + case cfg[:corpus] + when 'geoquery' + corpus = 'geoquery' + STDERR.write "SMT_SEMPARSE=#{SMT_SEMPARSE}\n" + STDERR.write "EVAL_PL=#{EVAL_PL}\n" + when 'free917' + corpus = 'free917' + STDERR.write "SEMPRE=#{SEMPRE}\n" + if cfg[:model] == 0 + STDERR.write "For Free917 please specify a model number.\n" + exit 1 + end + original_dir = Dir.pwd + Dir.chdir "#{SEMPRE}" + @out, @in, @pid = PTY.spawn("./sempre @mode=interact @domain=free917 @sparqlserver=localhost:3093 @cacheserver=local @load=#{cfg[:model]} @executeTopOnly=0") + @out.expect(/> /,timeout=300)[0] + @in.printf("at what institutions was marshall hall a professor\n")#to initialize model + result = @out.expect(/> /,timeout=300) + Dir.chdir original_dir + else + STDERR.write "NO SUCH CORPUS, exiting.\n" + exit 1 + end + STDERR.write "Corpus: #{corpus}\n" own_references = nil own_references = references.map{ |i| nil } @@ -172,6 +241,7 @@ def main if kbest.size == 0 without_translation += 1 STDERR.write "NO MT OUTPUT, skipping example\n" + #STDERR.write "#{CDEC_BIN} #{i} #{cfg[:cdec_ini]} #{tmp_file_path} #{cfg[:k]}" next end @@ -202,7 +272,7 @@ def main puts "#{kbest[0].s}" if iter+1==cfg[:iterate] # execute 1best - feedback, mrl, output = exec kbest[0].s, gold[j] + feedback, mrl, output = exec kbest[0].s, gold[j], corpus STDERR.write " SCORES: #{kbest[0].scores.to_s}\n" top1_stats.update feedback, mrl, output @@ -213,11 +283,11 @@ def main when 'rampion' hope, fear, skip, type1, type2 = gethopefear_rampion kbest, references[j] when 'rebol' - hope, fear, skip, type1, type2, new_reference = gethopefear_rebol kbest, feedback, gold[j], cfg[:hope_fear_max], own_references[j] + hope, fear, skip, type1, type2, new_reference = gethopefear_rebol kbest, feedback, gold[j], cfg[:hope_fear_max], corpus, own_references[j] when 'rebol_light' - hope, fear, skip, type1, type2 = gethopefear_rebol_light kbest, feedback, gold[j] + hope, fear, skip, type1, type2 = gethopefear_rebol_light kbest, feedback, gold[j], corpus when 'only_exec' - hope, fear, skip, type1, type2, new_reference = gethopefear_exec kbest, feedback, gold[j], cfg[:hope_fear_max], own_references[j] + hope, fear, skip, type1, type2, new_reference = gethopefear_exec kbest, feedback, gold[j], cfg[:hope_fear_max], corpus, own_references[j] else STDERR.write "NO SUCH VARIANT, exiting.\n" exit 1 @@ -243,7 +313,7 @@ def main # hope output & statistics STDERR.write "\n [HOPE]\n" if hope - feedback, mrl, output = exec hope.s, gold[j] + feedback, mrl, output = exec hope.s, gold[j], corpus STDERR.write " SCORES: #{hope.scores.to_s}, ##{hope.rank}\n" hope_stats.update feedback, mrl, output if hope.s==references[j] @@ -257,7 +327,7 @@ def main # fear output & statistics STDERR.write "\n [FEAR]\n" if fear - feedback, mrl, output = exec fear.s, gold[j] + feedback, mrl, output = exec fear.s, gold[j], corpus STDERR.write " SCORES: #{fear.scores.to_s}, ##{fear.rank}\n" fear_stats.update feedback, mrl, output end -- cgit v1.2.3