summaryrefslogtreecommitdiff
path: root/data/geoquery/smt-semparse/functionalizer.py
diff options
context:
space:
mode:
Diffstat (limited to 'data/geoquery/smt-semparse/functionalizer.py')
-rw-r--r--data/geoquery/smt-semparse/functionalizer.py143
1 files changed, 0 insertions, 143 deletions
diff --git a/data/geoquery/smt-semparse/functionalizer.py b/data/geoquery/smt-semparse/functionalizer.py
deleted file mode 100644
index 782b4e5..0000000
--- a/data/geoquery/smt-semparse/functionalizer.py
+++ /dev/null
@@ -1,143 +0,0 @@
-import logging
-import util
-import sys
-
-class Functionalizer:
-
- def __init__(self, config):
- self.config = config
-
- def run(self):
- hyp_file = open('%s/hyp.mrl.nbest' % self.config.experiment_dir)
- fun_file = open('%s/hyp.fun' % self.config.experiment_dir, 'w')
-
- hypsets = []
- hypset = []
- last_eid = 0
- for line in hyp_file:
- parts = line.split('|||')
- eid = int(parts[0])
- if eid != last_eid:
- hypsets.append(hypset)
- hypset = []
- last_eid = eid
- score = parts[2] + ' ||| ' + parts[3].strip()
- hyp = parts[1].strip()
- hypset.append((hyp,score))
- hypsets.append(hypset)
-
- counter = 0
- for hypset in hypsets:
- hypset = list(reversed(hypset))
- while hypset:
- hyp, score = hypset.pop()
- fun = self.functionalize(hyp)
- if fun:
- print >>fun_file, counter, '|||', fun, '|||', score
- break
- counter += 1
-
- def run_sentence(self, experiment_dir, temp_dir):
- hyp_file = open('%s/nbest.tmp' % temp_dir, 'r')
-
- hypsets = []
- hypset = []
- last_eid = 0
- for line in hyp_file:
- parts = line.split('|||')
- eid = int(parts[0])
- if eid != last_eid:
- hypsets.append(hypset)
- hypset = []
- last_eid = eid
- score = parts[2] + ' ||| ' + parts[3].strip()
- hyp = parts[1].strip()
- hypset.append((hyp,score))
- hypsets.append(hypset)
- hyp_file.close()
-
- counter = 0
- for hypset in hypsets:
- hypset = list(reversed(hypset))
- while hypset:
- hyp, score = hypset.pop()
- fun = self.functionalize(hyp)
- if fun:
- return fun
- break
- counter += 1
- return ""
-
- #xc = 0
- def functionalize(self, mrl):
-
- #if '_@0' in mrl and 'cityid@2' in mrl:
- # #print '==='
- # #print mrl
- # self.xc += 1
- # if self.xc > 5:
- # exit()
-
- stack = []
- r = []
- tokens = list(reversed(mrl.split()))
-
- #print tokens
-
- while tokens:
- it = tokens.pop()
- #print it
- if util.ARITY_SEP not in it:
- token = it
- arity = util.ARITY_STR
- logging.warn('unrecognized token: %s', it)
- else:
- token, arity = it.rsplit(util.ARITY_SEP)
- if arity == util.ARITY_STR:
- arity = 0
- arity_str = True
- elif not (arity == util.ARITY_ANY):
- arity = int(arity)
- arity_str = False
-
- if arity == util.ARITY_ANY or arity > 0:
- r.append(token)
- r.append('(')
- stack.append(arity)
- else:
- assert arity == 0
- if arity_str:
- r.append("'%s'" % token.replace('_', ' '))
- else:
- r.append(token)
- #print r
- while stack:
- top = stack.pop()
- if top == util.ARITY_ANY and tokens:
- r.append(',')
- stack.append(util.ARITY_ANY)
- break
- elif top != util.ARITY_ANY and top > 1:
- r.append(',')
- stack.append(top - 1)
- break
- else:
- r.append(')')
-
- if not stack and tokens:
- return None
-
- if stack:
- return None
-
- r = ''.join(r)
-
- # nasty hacks to fix misplaced _
- if '(_' in r:
- return None
- if ',_' in r and not ('cityid' in r):
- return None
- if '_),_)' in r:
- return None
-
- return r