summaryrefslogtreecommitdiff
path: root/gi/pyp-topics/scripts
diff options
context:
space:
mode:
authortrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-07 14:13:23 +0000
committertrevor.cohn <trevor.cohn@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-07-07 14:13:23 +0000
commit3fc163ac91c04c90962561cae6028d7bcf72f5f2 (patch)
tree85c67a2b7fec735a686eb05e05e088f3ad6d29b6 /gi/pyp-topics/scripts
parent58e5949176bfbd66ab1eefa8ba82d729a36a5f70 (diff)
Made a bit more flexible
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@175 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pyp-topics/scripts')
-rwxr-xr-xgi/pyp-topics/scripts/spans2labels.py10
1 files changed, 8 insertions, 2 deletions
diff --git a/gi/pyp-topics/scripts/spans2labels.py b/gi/pyp-topics/scripts/spans2labels.py
index c6f131b6..0560af39 100755
--- a/gi/pyp-topics/scripts/spans2labels.py
+++ b/gi/pyp-topics/scripts/spans2labels.py
@@ -14,13 +14,19 @@ if len(sys.argv) > 2:
phrase_context_index = {}
for line in file(sys.argv[1], 'r'):
phrase,tail= line.split('\t')
- contexts = tail.split(" ||| ")[1:]
+ contexts = tail.split(" ||| ")
+ try: # remove Phil's bizarre integer pair
+ x,y = contexts[0].split()
+ x=int(x); y=int(y)
+ contexts = contexts[1:]
+ except:
+ pass
if len(contexts) == 1: continue
assert len(contexts) % 2 == 0
for i in range(0, len(contexts), 2):
category = contexts[i+1].split("=")[1].strip()
phrase_context_index[(phrase,contexts[i])] = category
-# print (phrase,contexts[i]), category
+ #print (phrase,contexts[i]), category
for line in sys.stdin:
line_segments = line.split('|||')