summaryrefslogtreecommitdiff
path: root/gi/pyp-topics/scripts/map-documents.py
diff options
context:
space:
mode:
authorphilblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-22 20:34:00 +0000
committerphilblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-22 20:34:00 +0000
commit2f2ba42a1453f4a3a08f9c1ecfc53c1b1c83d550 (patch)
tree646e81b6325280f64a72771b5eeadf5118e465a9 /gi/pyp-topics/scripts/map-documents.py
parent2f2e36ca3060e7e9853c3d611f6cc5e112a76ddd (diff)
Initial ci of gi dir
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@5 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi/pyp-topics/scripts/map-documents.py')
-rwxr-xr-xgi/pyp-topics/scripts/map-documents.py20
1 files changed, 20 insertions, 0 deletions
diff --git a/gi/pyp-topics/scripts/map-documents.py b/gi/pyp-topics/scripts/map-documents.py
new file mode 100755
index 00000000..703de312
--- /dev/null
+++ b/gi/pyp-topics/scripts/map-documents.py
@@ -0,0 +1,20 @@
+#!/usr/bin/python
+
+import sys
+
+if len(sys.argv) != 2:
+ print "Usage: map-documents.py vocab-file"
+ exit(1)
+
+vocab = file(sys.argv[1], 'r').readlines()
+term_dict = map(lambda x: x.strip(), vocab)
+
+for line in sys.stdin:
+ tokens = line.split()
+ for token in tokens:
+ elements = token.split(':')
+ if len(elements) == 1:
+ print "%s" % (term_dict[int(elements[0])]),
+ else:
+ print "%s:%s" % (term_dict[int(elements[0])], elements[1]),
+ print