summaryrefslogtreecommitdiff
path: root/gi/pyp-topics/scripts/contexts2documents.py
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2012-10-22 12:07:20 +0100
committerKenneth Heafield <github@kheafield.com>2012-10-22 12:07:20 +0100
commit5f98fe5c4f2a2090eeb9d30c030305a70a8347d1 (patch)
tree9b6002f850e6dea1e3400c6b19bb31a9cdf3067f /gi/pyp-topics/scripts/contexts2documents.py
parentcf9994131993b40be62e90e213b1e11e6b550143 (diff)
parent21825a09d97c2e0afd20512f306fb25fed55e529 (diff)
Merge remote branch 'upstream/master'
Conflicts: Jamroot bjam decoder/Jamfile decoder/cdec.cc dpmert/Jamfile jam-files/sanity.jam klm/lm/Jamfile klm/util/Jamfile mira/Jamfile
Diffstat (limited to 'gi/pyp-topics/scripts/contexts2documents.py')
-rwxr-xr-xgi/pyp-topics/scripts/contexts2documents.py37
1 files changed, 0 insertions, 37 deletions
diff --git a/gi/pyp-topics/scripts/contexts2documents.py b/gi/pyp-topics/scripts/contexts2documents.py
deleted file mode 100755
index 9be4ebbb..00000000
--- a/gi/pyp-topics/scripts/contexts2documents.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/python
-
-import sys
-from operator import itemgetter
-
-if len(sys.argv) > 3:
- print "Usage: contexts2documents.py [contexts_index_out] [phrases_index_out]"
- exit(1)
-
-context_index = {}
-phrase_index = {}
-for line in sys.stdin:
- phrase, line_tail = line.split('\t')
-
- raw_contexts = line_tail.split('|||')
- contexts = [c.strip() for x,c in enumerate(raw_contexts) if x%2 == 0]
- counts = [int(c.split('=')[1].strip()) for x,c in enumerate(raw_contexts) if x%2 != 0]
- phrase_index.setdefault(phrase, len(phrase_index))
- print len(contexts),
- for context,count in zip(contexts,counts):
- c = context_index.setdefault(context, len(context_index))
- print "%d:%d" % (c,count),
- print
-if 1 < len(sys.argv) < 4:
- contexts_out = open(sys.argv[1],'w')
- contexts = context_index.items()
- contexts.sort(key = itemgetter(1))
- for context in contexts:
- print >>contexts_out, context[0]
- contexts_out.close()
-if len(sys.argv) == 3:
- phrases_out = open(sys.argv[2],'w')
- phrases = phrase_index.items()
- phrases.sort(key = itemgetter(1))
- for phrase in phrases:
- print >>phrases_out, phrase[0]
- phrases_out.close()