From d637e517919852217705a34d1355020a97af2f6a Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Fri, 11 Apr 2014 11:55:24 +0200 Subject: hashing, helper scripts --- hopefear.rb | 1 + lampion.rb | 3 ++- scripts/geoquery/sigf_format.rb | 20 ++++++++++++++++++++ scripts/geoquery/test-nof-old-crawl.sh | 5 +++++ 4 files changed, 28 insertions(+), 1 deletion(-) create mode 100755 scripts/geoquery/sigf_format.rb create mode 100755 scripts/geoquery/test-nof-old-crawl.sh diff --git a/hopefear.rb b/hopefear.rb index ef1fd43..49ef593 100644 --- a/hopefear.rb +++ b/hopefear.rb @@ -115,6 +115,7 @@ def gethopefear_fear_no_exec_hope_exec_skip kbest, feedback, gold, max return hope, fear, skip, type1, type2 end +# new variant w/ "real" reference def gethopefear_only_exec kbest, feedback, gold, max, own_reference=nil hope = fear = nil; hope_idx = 0; new_reference = nil type1 = type2 = false diff --git a/lampion.rb b/lampion.rb index ab8861f..ab527d0 100755 --- a/lampion.rb +++ b/lampion.rb @@ -4,6 +4,7 @@ require 'nlp_ruby' require 'trollop' require 'tempfile' require 'memcached' +require 'digest' require_relative './hopefear' @@ -11,7 +12,7 @@ def exec natural_language_string, reference_output, no_output=false mrl = output = feedback = nil # this may cause collisions, but there are not so many German words that # could have different Umlauts at the same position, e.g. Häuser => H?user - key_prefix = natural_language_string.encode('ASCII', :invalid => :replace, :undef => :replace, :replace => '?').gsub(/ /,'_') + key_prefix = Digest::SHA1.hexdigest(natural_language_string.encode('ASCII', :invalid => :replace, :undef => :replace, :replace => '?').gsub(/ /,'_')) begin mrl = $cache.get key_prefix+'__MRL' output = $cache.get key_prefix+'__OUTPUT' diff --git a/scripts/geoquery/sigf_format.rb b/scripts/geoquery/sigf_format.rb new file mode 100755 index 0000000..9d0125b --- /dev/null +++ b/scripts/geoquery/sigf_format.rb @@ -0,0 +1,20 @@ +#!/usr/bin/env ruby + +require 'nlp_ruby' + + +gold = ReadFile.readlines_strip ARGV[0] +i = -1 +while line = STDIN.gets + i += 1 + line.strip! + a = [0, 0, 1] + if line==gold[i] + a[0] = 1 + a[1] = 1 + elsif line!='' + a[1] = 1 + end + puts a.join " " +end + diff --git a/scripts/geoquery/test-nof-old-crawl.sh b/scripts/geoquery/test-nof-old-crawl.sh new file mode 100755 index 0000000..79c35a8 --- /dev/null +++ b/scripts/geoquery/test-nof-old-crawl.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +/workspace/grounded/lampion/scripts/geoquery/translate.sh $1 $2 < /workspace/grounded/lampion/proper/d/split880.test-nof-old-crawl.in | tee $2.transl | /workspace/grounded/lampion/scripts/geoquery/semparse.rb $3 | tee $2.parsed | /workspace/grounded/lampion/scripts/geoquery/query.rb $3 > $2.output +/workspace/grounded/lampion/scripts/geoquery/eval.rb /workspace/grounded/lampion/proper/d/split880.test-nof.gold < $2.output > $2.result + -- cgit v1.2.3