src/util.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67

import re
from collections import defaultdict

ARITY_SEP = '@'
ARITY_STR = 's'
ARITY_ANY = '*'

def after_nth(mrl, token, n):
  #print mrl, token
  while n > 0:
    m = re.search(r'\b%s\b' % token, mrl)
    #m = re.search(r'(^|[(, ])%s[(),]' % token, mrl)
    mrl = mrl[m.end()-1:]
    n = n - 1;
  return mrl

def count_arguments(s):
  args = False;
  parens = 0;
  commas = 0;
  i = 0
  #while parens >= 0 and i < len(s):
  while i < len(s) and ((not args and parens == 0) or (args and parens > 0)):
    c = s[i:i+1]
    if c == '(':
      args = True
      parens += 1
    elif c == ')':
      parens -= 1
    elif parens == 1 and c == ',':
      commas += 1
    elif parens < 1 and c == ',':
      break
    i += 1
  if args:
   return commas + 1
  else:
    assert commas == 0
    return 0

def fun_to_mrl(mrl, star_top=False):
  mrl = mrl.strip()

  mrl = re.sub(r"' *([A-Za-z0-9_ ]+?) *'", lambda x: '%s%s%s' % (x.group(1).replace(' ', '_'), ARITY_SEP, ARITY_STR), mrl)
  mrl = re.sub(r'\s+', ' ', mrl)
  mrl_noparens = re.sub(r'[\(\)]', ' ', mrl)
  mrl_noparens = re.sub(r'\s+', ' ', mrl_noparens)
  mrl_nocommas = re.sub(r',', ' ', mrl_noparens)
  mrl_nocommas = re.sub(r'\s+', ' ', mrl_nocommas)

  mrl_labeled_tokens = []
  seen = defaultdict(lambda:0)
  for token in mrl_nocommas.split():
    seen[token] += 1
    args = count_arguments(after_nth(mrl, token, seen[token]))
    #print token, args, after_nth(mrl, token, seen[token])
    if token[-len(ARITY_SEP)-len(ARITY_STR):] == '%s%s' % (ARITY_SEP, ARITY_STR):
      mrl_labeled_tokens.append(token)
    else:
      mrl_labeled_tokens.append('%s%s%d' % (token, ARITY_SEP, args))

  if star_top:
    tok = mrl_labeled_tokens[0]
    sep = tok.rindex(ARITY_SEP)
    mrl_labeled_tokens[0] = tok[:sep] + ARITY_SEP + ARITY_ANY
  
  return ' '.join(mrl_labeled_tokens)