summaryrefslogtreecommitdiff
path: root/gi/pyp-topics/scripts/map-documents.py
blob: 703de312ba0a55b73d638e13dbad70e632589cc1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/usr/bin/python

import sys

if len(sys.argv) != 2:
  print "Usage: map-documents.py vocab-file"
  exit(1)

vocab = file(sys.argv[1], 'r').readlines()
term_dict = map(lambda x: x.strip(), vocab)

for line in sys.stdin:
  tokens = line.split()
  for token in tokens:
    elements = token.split(':')
    if len(elements) == 1:
      print "%s" % (term_dict[int(elements[0])]),
    else:
      print "%s:%s" % (term_dict[int(elements[0])], elements[1]),
  print