diff options
author | Jacob <andqso@gmail.com> | 2013-07-28 09:54:54 +0100 |
---|---|---|
committer | Jacob <andqso@gmail.com> | 2013-07-28 09:54:54 +0100 |
commit | f343459d6198352964dbb6779f15c352fe2d5794 (patch) | |
tree | 07d50c9a8269e3892ccea8f5680b3e2bac984fce /src/util.py |
init
Diffstat (limited to 'src/util.py')
-rw-r--r-- | src/util.py | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/src/util.py b/src/util.py new file mode 100644 index 0000000..7ce1c7f --- /dev/null +++ b/src/util.py @@ -0,0 +1,67 @@ +import re +from collections import defaultdict + +ARITY_SEP = '@' +ARITY_STR = 's' +ARITY_ANY = '*' + +def after_nth(mrl, token, n): + #print mrl, token + while n > 0: + m = re.search(r'\b%s\b' % token, mrl) + #m = re.search(r'(^|[(, ])%s[(),]' % token, mrl) + mrl = mrl[m.end()-1:] + n = n - 1; + return mrl + +def count_arguments(s): + args = False; + parens = 0; + commas = 0; + i = 0 + #while parens >= 0 and i < len(s): + while i < len(s) and ((not args and parens == 0) or (args and parens > 0)): + c = s[i:i+1] + if c == '(': + args = True + parens += 1 + elif c == ')': + parens -= 1 + elif parens == 1 and c == ',': + commas += 1 + elif parens < 1 and c == ',': + break + i += 1 + if args: + return commas + 1 + else: + assert commas == 0 + return 0 + +def fun_to_mrl(mrl, star_top=False): + mrl = mrl.strip() + + mrl = re.sub(r"' *([A-Za-z0-9_ ]+?) *'", lambda x: '%s%s%s' % (x.group(1).replace(' ', '_'), ARITY_SEP, ARITY_STR), mrl) + mrl = re.sub(r'\s+', ' ', mrl) + mrl_noparens = re.sub(r'[\(\)]', ' ', mrl) + mrl_noparens = re.sub(r'\s+', ' ', mrl_noparens) + mrl_nocommas = re.sub(r',', ' ', mrl_noparens) + mrl_nocommas = re.sub(r'\s+', ' ', mrl_nocommas) + + mrl_labeled_tokens = [] + seen = defaultdict(lambda:0) + for token in mrl_nocommas.split(): + seen[token] += 1 + args = count_arguments(after_nth(mrl, token, seen[token])) + #print token, args, after_nth(mrl, token, seen[token]) + if token[-len(ARITY_SEP)-len(ARITY_STR):] == '%s%s' % (ARITY_SEP, ARITY_STR): + mrl_labeled_tokens.append(token) + else: + mrl_labeled_tokens.append('%s%s%d' % (token, ARITY_SEP, args)) + + if star_top: + tok = mrl_labeled_tokens[0] + sep = tok.rindex(ARITY_SEP) + mrl_labeled_tokens[0] = tok[:sep] + ARITY_SEP + ARITY_ANY + + return ' '.join(mrl_labeled_tokens) |