summaryrefslogtreecommitdiff
path: root/gi/pyp-topics/scripts/spans2labels.py
diff options
context:
space:
mode:
Diffstat (limited to 'gi/pyp-topics/scripts/spans2labels.py')
-rwxr-xr-xgi/pyp-topics/scripts/spans2labels.py22
1 files changed, 12 insertions, 10 deletions
diff --git a/gi/pyp-topics/scripts/spans2labels.py b/gi/pyp-topics/scripts/spans2labels.py
index 3dc60835..f3968616 100755
--- a/gi/pyp-topics/scripts/spans2labels.py
+++ b/gi/pyp-topics/scripts/spans2labels.py
@@ -33,15 +33,14 @@ for line in file(sys.argv[1], 'r'):
if len(contexts) == 1: continue
assert len(contexts) % 2 == 0
for i in range(0, len(contexts), 2):
- #parse contexts[i+1] = " C=1 P=0.8 "
- features = contexts[i+1].split()
- category = features[0].split("=")[1].strip()
- prob = float(features[1].split("=")[1].strip())
- if prob >= threshold:
- phrase_context_index[(phrase,contexts[i])] = category
- else:
- phrase_context_index[(phrase,contexts[i])] = cutoff_cat
-# print (phrase,contexts[i]), category, prob
+ #parse contexts[i+1] = " C=1 P=0.8 ... "
+ features=dict([ keyval.split('=') for keyval in contexts[i+1].split()])
+ category = features['C']
+ if features.has_key('P') and float(features['P']) < threshold:
+ category = cutoff_cat
+
+ phrase_context_index[(phrase,contexts[i])] = category
+# print (phrase,contexts[i]), category, prob
for line in sys.stdin:
line_segments = line.split('|||')
@@ -87,5 +86,8 @@ for line in sys.stdin:
context = contextt
label = phrase_context_index.get((phrase,context), "<UNK>")
- print "%d-%d-%d-%d:X%s" % (s1-order,s2-order,t1-order,t2-order,label),
+ if label != cutoff_cat: #cutoff'd spans are left unlabelled
+ print "%d-%d-%d-%d:X%s" % (s1-order,s2-order,t1-order,t2-order,label),
print
+
+