diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-08-01 17:32:37 +0200 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-08-01 17:32:37 +0200 |
commit | eb3ea4fd5dff1c94b237af792c9f7bf421d79d96 (patch) | |
tree | 2acd7674f36e6dc6e815c5856519fdea1a2d6bf8 /sa-extract/wrap_input.py | |
parent | e816274e337a066df1b1e86ef00136a021a17caf (diff) | |
parent | 193d137056c3c4f73d66f8db84691d63307de894 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'sa-extract/wrap_input.py')
-rwxr-xr-x | sa-extract/wrap_input.py | 37 |
1 files changed, 0 insertions, 37 deletions
diff --git a/sa-extract/wrap_input.py b/sa-extract/wrap_input.py deleted file mode 100755 index e859a4fd..00000000 --- a/sa-extract/wrap_input.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python -import sys -import codecs -import os -import os.path -from xml.sax.saxutils import escape - -graPrefix = sys.argv[1] - -# Second argument can be a file with observable sentence-level features, -# one set of features per line (parallel with source sentences). Features are space-delimited indicator features. -obsFeatsFile = None -if len(sys.argv) == 3: - obsFeatsFilename = sys.argv[2] - obsFeatsFile = open(obsFeatsFilename) - -sys.stdin = codecs.getreader("utf-8")(sys.stdin) -sys.stdout = codecs.getwriter("utf-8")(sys.stdout) - -i = 0 -for line in sys.stdin: - filename = "%s%d"%(graPrefix,i) - if not os.path.exists(filename): - filenameGz = filename + ".gz" - if not os.path.exists(filenameGz): - print >>sys.stderr, "Grammar file not found: ", filename, filenameGz - sys.exit(1) - else: - filename = filenameGz - - if obsFeatsFile: - obsFeats = obsFeatsFile.next().strip() - print '<seg id="%d" features="%s" grammar="%s"> '%(i,obsFeats,filename) + escape(line.strip()) + " </seg>" - else: - print '<seg id="%d" grammar="%s"> '%(i,filename) + escape(line.strip()) + " </seg>" - i+=1 - |