diff options
author | philblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-06-22 20:34:00 +0000 |
---|---|---|
committer | philblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f> | 2010-06-22 20:34:00 +0000 |
commit | 2f2ba42a1453f4a3a08f9c1ecfc53c1b1c83d550 (patch) | |
tree | 646e81b6325280f64a72771b5eeadf5118e465a9 /gi/pyp-topics/scripts/spans2labels.py | |
parent | 2f2e36ca3060e7e9853c3d611f6cc5e112a76ddd (diff) |
Initial ci of gi dir
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@5 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pyp-topics/scripts/spans2labels.py')
-rwxr-xr-x | gi/pyp-topics/scripts/spans2labels.py | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/gi/pyp-topics/scripts/spans2labels.py b/gi/pyp-topics/scripts/spans2labels.py new file mode 100755 index 00000000..b523e191 --- /dev/null +++ b/gi/pyp-topics/scripts/spans2labels.py @@ -0,0 +1,46 @@ +#!/usr/bin/python + +import sys +from operator import itemgetter + +if len(sys.argv) != 4: + print "Usage: spans2labels.py phrase_index context_index phrase_context_index" + exit(1) + +phrase_index = dict(map(lambda x: (x[1].strip(),x[0]), enumerate(file(sys.argv[1], 'r').readlines()))) +context_index = dict(map(lambda x: (x[1].strip(),x[0]), enumerate(file(sys.argv[2], 'r').readlines()))) + +phrase_context_index = {} +for i,line in enumerate(file(sys.argv[3], 'r').readlines()): + for c,l in map(lambda x: x.split(':'), line.split()[1:]): + phrase_context_index[(int(i),int(c))] = l + +for line in sys.stdin: + line_segments = line.split('|||') + source = ['<s>'] + line_segments[0].split() + ['</s>'] + target = ['<s>'] + line_segments[1].split() + ['</s>'] + phrases = [ [int(i) for i in x.split('-')] for x in line_segments[2].split()] + +# for x in source[1:-1]: +# print x, +# print "|||", +# for x in target[1:-1]: +# print x, + print "|||", + + for s1,s2,t1,t2 in phrases: + s1 += 1 + s2 += 1 + t1 += 1 + t2 += 1 + + phrase = reduce(lambda x, y: x+y+" ", target[t1:t2], "").strip() + context = "%s <PHRASE> %s" % (target[t1-1], target[t2]) + + pi = phrase_index[phrase] + ci = context_index[context] + label = phrase_context_index[(pi,ci)] + print "%s-%s:%s" % (t1-1,t2-1,label), +# print phrase, pi, context, ci +# print phrase_context_index[(pi,ci)] + print |