diff options
| author | trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-07 14:13:23 +0000 | 
|---|---|---|
| committer | trevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-07 14:13:23 +0000 | 
| commit | 3fc163ac91c04c90962561cae6028d7bcf72f5f2 (patch) | |
| tree | 85c67a2b7fec735a686eb05e05e088f3ad6d29b6 | |
| parent | 58e5949176bfbd66ab1eefa8ba82d729a36a5f70 (diff) | |
Made a bit more flexible
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@175 ec762483-ff6d-05da-a07a-a48fb63a330f
| -rwxr-xr-x | gi/pyp-topics/scripts/spans2labels.py | 10 | 
1 files changed, 8 insertions, 2 deletions
| diff --git a/gi/pyp-topics/scripts/spans2labels.py b/gi/pyp-topics/scripts/spans2labels.py index c6f131b6..0560af39 100755 --- a/gi/pyp-topics/scripts/spans2labels.py +++ b/gi/pyp-topics/scripts/spans2labels.py @@ -14,13 +14,19 @@ if len(sys.argv) > 2:  phrase_context_index = {}  for line in file(sys.argv[1], 'r'):    phrase,tail= line.split('\t') -  contexts = tail.split(" ||| ")[1:] +  contexts = tail.split(" ||| ") +  try: # remove Phil's bizarre integer pair +       x,y = contexts[0].split() +       x=int(x); y=int(y) +       contexts = contexts[1:] +  except: +       pass    if len(contexts) == 1: continue    assert len(contexts) % 2 == 0    for i in range(0, len(contexts), 2):      category = contexts[i+1].split("=")[1].strip()      phrase_context_index[(phrase,contexts[i])] = category -#   print (phrase,contexts[i]), category +    #print (phrase,contexts[i]), category  for line in sys.stdin:    line_segments = line.split('|||') | 
