From 3fc163ac91c04c90962561cae6028d7bcf72f5f2 Mon Sep 17 00:00:00 2001 From: "trevor.cohn" Date: Wed, 7 Jul 2010 14:13:23 +0000 Subject: Made a bit more flexible git-svn-id: https://ws10smt.googlecode.com/svn/trunk@175 ec762483-ff6d-05da-a07a-a48fb63a330f --- gi/pyp-topics/scripts/spans2labels.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'gi/pyp-topics/scripts') diff --git a/gi/pyp-topics/scripts/spans2labels.py b/gi/pyp-topics/scripts/spans2labels.py index c6f131b6..0560af39 100755 --- a/gi/pyp-topics/scripts/spans2labels.py +++ b/gi/pyp-topics/scripts/spans2labels.py @@ -14,13 +14,19 @@ if len(sys.argv) > 2: phrase_context_index = {} for line in file(sys.argv[1], 'r'): phrase,tail= line.split('\t') - contexts = tail.split(" ||| ")[1:] + contexts = tail.split(" ||| ") + try: # remove Phil's bizarre integer pair + x,y = contexts[0].split() + x=int(x); y=int(y) + contexts = contexts[1:] + except: + pass if len(contexts) == 1: continue assert len(contexts) % 2 == 0 for i in range(0, len(contexts), 2): category = contexts[i+1].split("=")[1].strip() phrase_context_index[(phrase,contexts[i])] = category -# print (phrase,contexts[i]), category + #print (phrase,contexts[i]), category for line in sys.stdin: line_segments = line.split('|||') -- cgit v1.2.3