From 88314a9633e323db207c2fe70cf846bb5c7c4438 Mon Sep 17 00:00:00 2001 From: bothameister Date: Thu, 15 Jul 2010 20:51:42 +0000 Subject: generalised parsing of 'features' (in clustering output) during span labelling git-svn-id: https://ws10smt.googlecode.com/svn/trunk@269 ec762483-ff6d-05da-a07a-a48fb63a330f --- gi/pyp-topics/scripts/spans2labels.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'gi/pyp-topics') diff --git a/gi/pyp-topics/scripts/spans2labels.py b/gi/pyp-topics/scripts/spans2labels.py index 3dc60835..f3968616 100755 --- a/gi/pyp-topics/scripts/spans2labels.py +++ b/gi/pyp-topics/scripts/spans2labels.py @@ -33,15 +33,14 @@ for line in file(sys.argv[1], 'r'): if len(contexts) == 1: continue assert len(contexts) % 2 == 0 for i in range(0, len(contexts), 2): - #parse contexts[i+1] = " C=1 P=0.8 " - features = contexts[i+1].split() - category = features[0].split("=")[1].strip() - prob = float(features[1].split("=")[1].strip()) - if prob >= threshold: - phrase_context_index[(phrase,contexts[i])] = category - else: - phrase_context_index[(phrase,contexts[i])] = cutoff_cat -# print (phrase,contexts[i]), category, prob + #parse contexts[i+1] = " C=1 P=0.8 ... " + features=dict([ keyval.split('=') for keyval in contexts[i+1].split()]) + category = features['C'] + if features.has_key('P') and float(features['P']) < threshold: + category = cutoff_cat + + phrase_context_index[(phrase,contexts[i])] = category +# print (phrase,contexts[i]), category, prob for line in sys.stdin: line_segments = line.split('|||') @@ -87,5 +86,8 @@ for line in sys.stdin: context = contextt label = phrase_context_index.get((phrase,context), "") - print "%d-%d-%d-%d:X%s" % (s1-order,s2-order,t1-order,t2-order,label), + if label != cutoff_cat: #cutoff'd spans are left unlabelled + print "%d-%d-%d-%d:X%s" % (s1-order,s2-order,t1-order,t2-order,label), print + + -- cgit v1.2.3