summaryrefslogtreecommitdiff
path: root/gi/pyp-topics/scripts/tokens2classes.py
blob: 33df255fb5ebc63a253063c1bf6587bff981122a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/python

import sys

if len(sys.argv) != 3:
  print "Usage: tokens2classes.py source_classes target_classes"
  exit(1)

source_to_topics = {}
for line in open(sys.argv[1],'r'):
  term,cls = line.split()
  source_to_topics[term] = cls

target_to_topics = {}
for line in open(sys.argv[2],'r'):
  term,cls = line.split()
  target_to_topics[term] = cls

for line in sys.stdin:
  source, target, tail = line.split(" ||| ")

  for token in source.split():
    print source_to_topics[token],
  print "|||",
  for token in target.split():
    print target_to_topics[token],
  print "|||", tail,