diff options
author | Chris Dyer <cdyer@cs.cmu.edu> | 2012-07-27 17:31:00 -0400 |
---|---|---|
committer | Chris Dyer <cdyer@cs.cmu.edu> | 2012-07-27 17:31:00 -0400 |
commit | b317e0efd2398d75d70e027bb1e2cf442e683981 (patch) | |
tree | ec34aff0ce4e8fb9704d1cd2b7abf00cb9a25b9a /sa-extract/wrap_input.py | |
parent | b2a8bccb2bd713d9ec081cf3dad0162c2cb492d8 (diff) |
remove old suffix array extractor (use the one in python/ instead)
Diffstat (limited to 'sa-extract/wrap_input.py')
-rwxr-xr-x | sa-extract/wrap_input.py | 37 |
1 files changed, 0 insertions, 37 deletions
diff --git a/sa-extract/wrap_input.py b/sa-extract/wrap_input.py deleted file mode 100755 index e859a4fd..00000000 --- a/sa-extract/wrap_input.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python -import sys -import codecs -import os -import os.path -from xml.sax.saxutils import escape - -graPrefix = sys.argv[1] - -# Second argument can be a file with observable sentence-level features, -# one set of features per line (parallel with source sentences). Features are space-delimited indicator features. -obsFeatsFile = None -if len(sys.argv) == 3: - obsFeatsFilename = sys.argv[2] - obsFeatsFile = open(obsFeatsFilename) - -sys.stdin = codecs.getreader("utf-8")(sys.stdin) -sys.stdout = codecs.getwriter("utf-8")(sys.stdout) - -i = 0 -for line in sys.stdin: - filename = "%s%d"%(graPrefix,i) - if not os.path.exists(filename): - filenameGz = filename + ".gz" - if not os.path.exists(filenameGz): - print >>sys.stderr, "Grammar file not found: ", filename, filenameGz - sys.exit(1) - else: - filename = filenameGz - - if obsFeatsFile: - obsFeats = obsFeatsFile.next().strip() - print '<seg id="%d" features="%s" grammar="%s"> '%(i,obsFeats,filename) + escape(line.strip()) + " </seg>" - else: - print '<seg id="%d" grammar="%s"> '%(i,filename) + escape(line.strip()) + " </seg>" - i+=1 - |