diff options
| author | bothameister <bothameister@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-15 20:51:42 +0000 | 
|---|---|---|
| committer | bothameister <bothameister@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-07-15 20:51:42 +0000 | 
| commit | 88314a9633e323db207c2fe70cf846bb5c7c4438 (patch) | |
| tree | 68f16f2559be55a1b62fb088dfd0b21133b26766 /gi | |
| parent | 6c6dffdb18b6b0090f47ee72748a19005d4a55a1 (diff) | |
generalised parsing of 'features' (in clustering output) during span labelling
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@269 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi')
| -rwxr-xr-x | gi/pyp-topics/scripts/spans2labels.py | 22 | 
1 files changed, 12 insertions, 10 deletions
| diff --git a/gi/pyp-topics/scripts/spans2labels.py b/gi/pyp-topics/scripts/spans2labels.py index 3dc60835..f3968616 100755 --- a/gi/pyp-topics/scripts/spans2labels.py +++ b/gi/pyp-topics/scripts/spans2labels.py @@ -33,15 +33,14 @@ for line in file(sys.argv[1], 'r'):    if len(contexts) == 1: continue    assert len(contexts) % 2 == 0    for i in range(0, len(contexts), 2): -    #parse contexts[i+1] = " C=1 P=0.8 " -    features = contexts[i+1].split() -    category = features[0].split("=")[1].strip() -    prob = float(features[1].split("=")[1].strip()) -    if prob >= threshold: -      phrase_context_index[(phrase,contexts[i])] = category -    else: -      phrase_context_index[(phrase,contexts[i])] = cutoff_cat -#      print (phrase,contexts[i]), category, prob +    #parse contexts[i+1] = " C=1 P=0.8 ... " +    features=dict([ keyval.split('=') for keyval in contexts[i+1].split()]) +    category = features['C']     +    if features.has_key('P') and float(features['P']) < threshold: +        category = cutoff_cat +     +    phrase_context_index[(phrase,contexts[i])] = category  +#   print (phrase,contexts[i]), category, prob  for line in sys.stdin:    line_segments = line.split('|||') @@ -87,5 +86,8 @@ for line in sys.stdin:          context = contextt      label = phrase_context_index.get((phrase,context), "<UNK>") -    print "%d-%d-%d-%d:X%s" % (s1-order,s2-order,t1-order,t2-order,label), +    if label != cutoff_cat: #cutoff'd spans are left unlabelled +      print "%d-%d-%d-%d:X%s" % (s1-order,s2-order,t1-order,t2-order,label),    print + + | 
