diff options
author | Kenneth Heafield <github@kheafield.com> | 2012-10-22 12:07:20 +0100 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2012-10-22 12:07:20 +0100 |
commit | 5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch) | |
tree | 9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /gi/pyp-topics/scripts/contexts2documents.py | |
parent | cf9994131993b40be62e90e213b1e11e6b550143 (diff) | |
parent | 21825a09d97c2e0afd20512f306fb25fed55e529 (diff) |
Merge remote branch 'upstream/master'
Conflicts:
Jamroot
bjam
decoder/Jamfile
decoder/cdec.cc
dpmert/Jamfile
jam-files/sanity.jam
klm/lm/Jamfile
klm/util/Jamfile
mira/Jamfile
Diffstat (limited to 'gi/pyp-topics/scripts/contexts2documents.py')
-rwxr-xr-x | gi/pyp-topics/scripts/contexts2documents.py | 37 |
1 files changed, 0 insertions, 37 deletions
diff --git a/gi/pyp-topics/scripts/contexts2documents.py b/gi/pyp-topics/scripts/contexts2documents.py deleted file mode 100755 index 9be4ebbb..00000000 --- a/gi/pyp-topics/scripts/contexts2documents.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/python - -import sys -from operator import itemgetter - -if len(sys.argv) > 3: - print "Usage: contexts2documents.py [contexts_index_out] [phrases_index_out]" - exit(1) - -context_index = {} -phrase_index = {} -for line in sys.stdin: - phrase, line_tail = line.split('\t') - - raw_contexts = line_tail.split('|||') - contexts = [c.strip() for x,c in enumerate(raw_contexts) if x%2 == 0] - counts = [int(c.split('=')[1].strip()) for x,c in enumerate(raw_contexts) if x%2 != 0] - phrase_index.setdefault(phrase, len(phrase_index)) - print len(contexts), - for context,count in zip(contexts,counts): - c = context_index.setdefault(context, len(context_index)) - print "%d:%d" % (c,count), - print -if 1 < len(sys.argv) < 4: - contexts_out = open(sys.argv[1],'w') - contexts = context_index.items() - contexts.sort(key = itemgetter(1)) - for context in contexts: - print >>contexts_out, context[0] - contexts_out.close() -if len(sys.argv) == 3: - phrases_out = open(sys.argv[2],'w') - phrases = phrase_index.items() - phrases.sort(key = itemgetter(1)) - for phrase in phrases: - print >>phrases_out, phrase[0] - phrases_out.close() |