summaryrefslogtreecommitdiff
path: root/data/geoquery/smt-semparse/functionalizer.py
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2014-04-14 16:05:40 +0200
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2014-04-14 16:05:40 +0200
commitb0bff4f48b2de88560199be09e5a29feecaa267c (patch)
tree448db9b6a6964223c2c5ec59a233885e01d22b74 /data/geoquery/smt-semparse/functionalizer.py
parent1bf772018b77e68137614a11add9f9f2f43ad344 (diff)
smt-semparse
Diffstat (limited to 'data/geoquery/smt-semparse/functionalizer.py')
-rw-r--r--data/geoquery/smt-semparse/functionalizer.py143
1 files changed, 143 insertions, 0 deletions
diff --git a/data/geoquery/smt-semparse/functionalizer.py b/data/geoquery/smt-semparse/functionalizer.py
new file mode 100644
index 0000000..782b4e5
--- /dev/null
+++ b/data/geoquery/smt-semparse/functionalizer.py
@@ -0,0 +1,143 @@
+import logging
+import util
+import sys
+
+class Functionalizer:
+
+ def __init__(self, config):
+ self.config = config
+
+ def run(self):
+ hyp_file = open('%s/hyp.mrl.nbest' % self.config.experiment_dir)
+ fun_file = open('%s/hyp.fun' % self.config.experiment_dir, 'w')
+
+ hypsets = []
+ hypset = []
+ last_eid = 0
+ for line in hyp_file:
+ parts = line.split('|||')
+ eid = int(parts[0])
+ if eid != last_eid:
+ hypsets.append(hypset)
+ hypset = []
+ last_eid = eid
+ score = parts[2] + ' ||| ' + parts[3].strip()
+ hyp = parts[1].strip()
+ hypset.append((hyp,score))
+ hypsets.append(hypset)
+
+ counter = 0
+ for hypset in hypsets:
+ hypset = list(reversed(hypset))
+ while hypset:
+ hyp, score = hypset.pop()
+ fun = self.functionalize(hyp)
+ if fun:
+ print >>fun_file, counter, '|||', fun, '|||', score
+ break
+ counter += 1
+
+ def run_sentence(self, experiment_dir, temp_dir):
+ hyp_file = open('%s/nbest.tmp' % temp_dir, 'r')
+
+ hypsets = []
+ hypset = []
+ last_eid = 0
+ for line in hyp_file:
+ parts = line.split('|||')
+ eid = int(parts[0])
+ if eid != last_eid:
+ hypsets.append(hypset)
+ hypset = []
+ last_eid = eid
+ score = parts[2] + ' ||| ' + parts[3].strip()
+ hyp = parts[1].strip()
+ hypset.append((hyp,score))
+ hypsets.append(hypset)
+ hyp_file.close()
+
+ counter = 0
+ for hypset in hypsets:
+ hypset = list(reversed(hypset))
+ while hypset:
+ hyp, score = hypset.pop()
+ fun = self.functionalize(hyp)
+ if fun:
+ return fun
+ break
+ counter += 1
+ return ""
+
+ #xc = 0
+ def functionalize(self, mrl):
+
+ #if '_@0' in mrl and 'cityid@2' in mrl:
+ # #print '==='
+ # #print mrl
+ # self.xc += 1
+ # if self.xc > 5:
+ # exit()
+
+ stack = []
+ r = []
+ tokens = list(reversed(mrl.split()))
+
+ #print tokens
+
+ while tokens:
+ it = tokens.pop()
+ #print it
+ if util.ARITY_SEP not in it:
+ token = it
+ arity = util.ARITY_STR
+ logging.warn('unrecognized token: %s', it)
+ else:
+ token, arity = it.rsplit(util.ARITY_SEP)
+ if arity == util.ARITY_STR:
+ arity = 0
+ arity_str = True
+ elif not (arity == util.ARITY_ANY):
+ arity = int(arity)
+ arity_str = False
+
+ if arity == util.ARITY_ANY or arity > 0:
+ r.append(token)
+ r.append('(')
+ stack.append(arity)
+ else:
+ assert arity == 0
+ if arity_str:
+ r.append("'%s'" % token.replace('_', ' '))
+ else:
+ r.append(token)
+ #print r
+ while stack:
+ top = stack.pop()
+ if top == util.ARITY_ANY and tokens:
+ r.append(',')
+ stack.append(util.ARITY_ANY)
+ break
+ elif top != util.ARITY_ANY and top > 1:
+ r.append(',')
+ stack.append(top - 1)
+ break
+ else:
+ r.append(')')
+
+ if not stack and tokens:
+ return None
+
+ if stack:
+ return None
+
+ r = ''.join(r)
+
+ # nasty hacks to fix misplaced _
+ if '(_' in r:
+ return None
+ if ',_' in r and not ('cityid' in r):
+ return None
+ if '_),_)' in r:
+ return None
+
+ return r