summaryrefslogtreecommitdiff
path: root/rebol.rb
diff options
context:
space:
mode:
authorcarhaas <carhaas1@gmail.com>2014-09-09 09:31:07 +0200
committercarhaas <carhaas1@gmail.com>2014-09-09 09:31:07 +0200
commit4dd862953695849412cef0d8d840266a0a8c86cf (patch)
treec28951147b1386c7725e3afc31020a527f647eec /rebol.rb
parent25fa34e5c4275cdd4a4ca735c12b4b22f7ed7c0e (diff)
add free917
Diffstat (limited to 'rebol.rb')
-rwxr-xr-xrebol.rb126
1 files changed, 98 insertions, 28 deletions
diff --git a/rebol.rb b/rebol.rb
index 04ace1c..b814d89 100755
--- a/rebol.rb
+++ b/rebol.rb
@@ -1,37 +1,80 @@
#!/usr/bin/env ruby
-require 'zipf'
+require 'nlp_ruby'
require 'trollop'
require 'tempfile'
require 'memcached'
require 'digest'
require_relative './hopefear'
+require 'pty'
+require 'expect'
+# memcached has to be running
+$cache = Memcached.new('localhost:11211')
-def exec natural_language_string, reference_output, no_output=false
+def exec natural_language_string, reference_output, corpus, no_output=false
mrl = output = feedback = nil
# this may cause collisions, but there are not so many German words that
# could have different Umlauts at the same position, e.g. Häuser => H?user
key_prefix = Digest::SHA1.hexdigest(natural_language_string.encode('ASCII', :invalid => :replace, :undef => :replace, :replace => '?').gsub(/ /,'_'))
- begin
- mrl = $cache.get key_prefix+'__MRL'
- output = $cache.get key_prefix+'__OUTPUT'
- feedback = $cache.get key_prefix+'__FEEDBACK'
- rescue Memcached::NotFound
- mrl_cmd = "#{SMT_SEMPARSE} \"#{natural_language_string.gsub('"', ' ')}\""
- # beware: EVAL_PL sometimes hangs and can't be killed!
- mrl = spawn_with_timeout(mrl_cmd, TIMEOUT, ACCEPT_ZOMBIES).strip
- output = spawn_with_timeout("echo \"execute_funql_query(#{mrl}, X).\" | swipl -s #{EVAL_PL} 2>&1 | grep \"X =\"", TIMEOUT).strip.split('X = ')[1]
- feedback = output==reference_output
+ if corpus == 'geoquery'
begin
- $cache.set key_prefix+'__MRL', mrl
- $cache.set key_prefix+'__OUTPUT', output
- $cache.set key_prefix+'__FEEDBACK', feedback
- rescue SystemExit, Interrupt
- $cache.delete key_prefix+'__MRL'
- $cache.delete key_prefix+'__OUTPUT'
- $cache.delete key_prefix+'__FEEDBACK"'
+ mrl = $cache.get key_prefix+'__MRL'
+ output = $cache.get key_prefix+'__OUTPUT'
+ feedback = $cache.get key_prefix+'__FEEDBACK'
+ rescue Memcached::NotFound
+ mrl_cmd = "#{SMT_SEMPARSE} \"#{natural_language_string}\""
+ # beware: EVAL_PL sometimes hangs and can't be killed!
+ mrl = spawn_with_timeout(mrl_cmd, TIMEOUT, ACCEPT_ZOMBIES).strip
+ output = spawn_with_timeout("echo \"execute_funql_query(#{mrl}, X).\" | swipl -s #{ EVAL_PL} 2>&1 | grep \"X =\"", TIMEOUT).strip.split('X = ')[1]
+ feedback = output==reference_output
+ begin
+ $cache.set key_prefix+'__MRL', mrl
+ $cache.set key_prefix+'__OUTPUT', output
+ $cache.set key_prefix+'__FEEDBACK', feedback
+ rescue SystemExit, Interrupt
+ $cache.delete key_prefix+'__MRL'
+ $cache.delete key_prefix+'__OUTPUT'
+ $cache.delete key_prefix+'__FEEDBACK"'
+ end
+ end
+ elsif corpus == 'free917'
+ begin
+ mrl = $cache.get key_prefix+'__MRL'
+ output = $cache.get key_prefix+'__OUTPUT'
+ feedback = $cache.get key_prefix+'__FEEDBACK'
+ rescue Memcached::NotFound
+ mrl = "not available"#the parser for freebase doesn't give a mrl, just the answer
+ output = ""
+ #STDERR.write "#{natural_language_string}\n"
+ @in.printf("#{natural_language_string}\n")
+ result = @out.expect(/^> /,TIMEOUT)
+ if result!=nil
+ result[0].delete!("\r\n")
+ result[0].delete!("\n")
+ result[0].delete!("\r")
+ matchData = result[0].match(/Top value { (.*) }>/)
+ if matchData!=nil
+ save = matchData[1].gsub(/^ */,"")
+ save = save.gsub(/ *$/,"")
+ save = save.gsub(/ +/," ")
+ output = save
+ #STDERR.write output
+ end
+ end
+ feedback = output==reference_output
+ begin
+ $cache.set key_prefix+'__MRL', mrl
+ $cache.set key_prefix+'__OUTPUT', output
+ $cache.set key_prefix+'__FEEDBACK', feedback
+ rescue SystemExit, Interrupt
+ $cache.delete key_prefix+'__MRL'
+ $cache.delete key_prefix+'__OUTPUT'
+ $cache.delete key_prefix+'__FEEDBACK"'
+ end
end
+
+
end
STDERR.write " nrl: #{natural_language_string}\n" if !no_output
STDERR.write " mrl: #{mrl}\n" if !no_output
@@ -84,6 +127,7 @@ def main
opt :init_weights, "initial weights", :type => :string, :required => true, :short => '-w'
opt :global_vars, "semantic parser, cdec bin, eval.pl", :type => :string, :required => true, :short => '-b'
opt :cdec_ini, "cdec config file", :type => :string, :required => true, :short => '-c'
+ opt :model, "parser model", :type => :int, :default => 0, :short => '-z'
# just used for 1best/hope variant detection
opt :stopwords_file, "stopwords file", :type => :string, :default => 'd/stopwords.en', :short => '-t'
# [output]
@@ -103,14 +147,14 @@ def main
opt :hope_fear_max, "# entries to consider when searching good hope/fear", :type => :int, :default => 10**10, :short => '-q'
# see hopefear.rb:
opt :variant, "rampion, rebol, rebol_light, exec", :type => :string, :default => 'rampion', :short => '-v'
+ opt :corpus, "corpus: either geoquery or free917", :type => :string, :required => true, :short => '-u'
end
require_relative cfg[:global_vars]
STDERR.write "CONFIGURATION\n"
cfg.each_pair { |k,v| STDERR.write " #{k}=#{v}\n" }
- STDERR.write "SMT_SEMPARSE=#{SMT_SEMPARSE}\n"
- STDERR.write "EVAL_PL=#{EVAL_PL}\n"
- STDERR.write "CDEC_BIN=#{CDEC_BIN}\n\n"
+ STDERR.write "CDEC_BIN=#{CDEC_BIN}\n"
+
# read data
input = ReadFile.readlines_strip cfg[:input]
@@ -118,6 +162,31 @@ def main
gold = ReadFile.readlines_strip cfg[:gold]
gold_mrl = ReadFile.readlines_strip cfg[:gold_mrl]
stopwords = ReadFile.readlines_strip cfg[:stopwords_file]
+ corpus = ""
+ case cfg[:corpus]
+ when 'geoquery'
+ corpus = 'geoquery'
+ STDERR.write "SMT_SEMPARSE=#{SMT_SEMPARSE}\n"
+ STDERR.write "EVAL_PL=#{EVAL_PL}\n"
+ when 'free917'
+ corpus = 'free917'
+ STDERR.write "SEMPRE=#{SEMPRE}\n"
+ if cfg[:model] == 0
+ STDERR.write "For Free917 please specify a model number.\n"
+ exit 1
+ end
+ original_dir = Dir.pwd
+ Dir.chdir "#{SEMPRE}"
+ @out, @in, @pid = PTY.spawn("./sempre @mode=interact @domain=free917 @sparqlserver=localhost:3093 @cacheserver=local @load=#{cfg[:model]} @executeTopOnly=0")
+ @out.expect(/> /,timeout=300)[0]
+ @in.printf("at what institutions was marshall hall a professor\n")#to initialize model
+ result = @out.expect(/> /,timeout=300)
+ Dir.chdir original_dir
+ else
+ STDERR.write "NO SUCH CORPUS, exiting.\n"
+ exit 1
+ end
+ STDERR.write "Corpus: #{corpus}\n"
own_references = nil
own_references = references.map{ |i| nil }
@@ -172,6 +241,7 @@ def main
if kbest.size == 0
without_translation += 1
STDERR.write "NO MT OUTPUT, skipping example\n"
+ #STDERR.write "#{CDEC_BIN} #{i} #{cfg[:cdec_ini]} #{tmp_file_path} #{cfg[:k]}"
next
end
@@ -202,7 +272,7 @@ def main
puts "#{kbest[0].s}" if iter+1==cfg[:iterate]
# execute 1best
- feedback, mrl, output = exec kbest[0].s, gold[j]
+ feedback, mrl, output = exec kbest[0].s, gold[j], corpus
STDERR.write " SCORES: #{kbest[0].scores.to_s}\n"
top1_stats.update feedback, mrl, output
@@ -213,11 +283,11 @@ def main
when 'rampion'
hope, fear, skip, type1, type2 = gethopefear_rampion kbest, references[j]
when 'rebol'
- hope, fear, skip, type1, type2, new_reference = gethopefear_rebol kbest, feedback, gold[j], cfg[:hope_fear_max], own_references[j]
+ hope, fear, skip, type1, type2, new_reference = gethopefear_rebol kbest, feedback, gold[j], cfg[:hope_fear_max], corpus, own_references[j]
when 'rebol_light'
- hope, fear, skip, type1, type2 = gethopefear_rebol_light kbest, feedback, gold[j]
+ hope, fear, skip, type1, type2 = gethopefear_rebol_light kbest, feedback, gold[j], corpus
when 'only_exec'
- hope, fear, skip, type1, type2, new_reference = gethopefear_exec kbest, feedback, gold[j], cfg[:hope_fear_max], own_references[j]
+ hope, fear, skip, type1, type2, new_reference = gethopefear_exec kbest, feedback, gold[j], cfg[:hope_fear_max], corpus, own_references[j]
else
STDERR.write "NO SUCH VARIANT, exiting.\n"
exit 1
@@ -243,7 +313,7 @@ def main
# hope output & statistics
STDERR.write "\n [HOPE]\n"
if hope
- feedback, mrl, output = exec hope.s, gold[j]
+ feedback, mrl, output = exec hope.s, gold[j], corpus
STDERR.write " SCORES: #{hope.scores.to_s}, ##{hope.rank}\n"
hope_stats.update feedback, mrl, output
if hope.s==references[j]
@@ -257,7 +327,7 @@ def main
# fear output & statistics
STDERR.write "\n [FEAR]\n"
if fear
- feedback, mrl, output = exec fear.s, gold[j]
+ feedback, mrl, output = exec fear.s, gold[j], corpus
STDERR.write " SCORES: #{fear.scores.to_s}, ##{fear.rank}\n"
fear_stats.update feedback, mrl, output
end