#!/usr/bin/env python
import sys
import codecs
import os
import os.path
from xml.sax.saxutils import escape
graPrefix = sys.argv[1]
# Second argument can be a file with observable sentence-level features,
# one set of features per line (parallel with source sentences). Features are space-delimited indicator features.
obsFeatsFile = None
if len(sys.argv) == 3:
obsFeatsFilename = sys.argv[2]
obsFeatsFile = open(obsFeatsFilename)
sys.stdin = codecs.getreader("utf-8")(sys.stdin)
sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
i = 0
for line in sys.stdin:
filename = "%s%d"%(graPrefix,i)
if not os.path.exists(filename):
filenameGz = filename + ".gz"
if not os.path.exists(filenameGz):
print >>sys.stderr, "Grammar file not found: ", filename, filenameGz
sys.exit(1)
else:
filename = filenameGz
if obsFeatsFile:
obsFeats = obsFeatsFile.next().strip()
print ' '%(i,obsFeats,filename) + escape(line.strip()) + " "
else:
print ' '%(i,filename) + escape(line.strip()) + " "
i+=1