summaryrefslogtreecommitdiff
path: root/sa-extract/wrap_input.py
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2012-08-03 07:46:54 -0400
committerKenneth Heafield <github@kheafield.com>2012-08-03 07:46:54 -0400
commitbe1ab0a8937f9c5668ea5e6c31b798e87672e55e (patch)
treea13aad60ab6cced213401bce6a38ac885ba171ba /sa-extract/wrap_input.py
parente5d6f4ae41009c26978ecd62668501af9762b0bc (diff)
parent9fe0219562e5db25171cce8776381600ff9a5649 (diff)
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'sa-extract/wrap_input.py')
-rwxr-xr-xsa-extract/wrap_input.py37
1 files changed, 0 insertions, 37 deletions
diff --git a/sa-extract/wrap_input.py b/sa-extract/wrap_input.py
deleted file mode 100755
index e859a4fd..00000000
--- a/sa-extract/wrap_input.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python
-import sys
-import codecs
-import os
-import os.path
-from xml.sax.saxutils import escape
-
-graPrefix = sys.argv[1]
-
-# Second argument can be a file with observable sentence-level features,
-# one set of features per line (parallel with source sentences). Features are space-delimited indicator features.
-obsFeatsFile = None
-if len(sys.argv) == 3:
- obsFeatsFilename = sys.argv[2]
- obsFeatsFile = open(obsFeatsFilename)
-
-sys.stdin = codecs.getreader("utf-8")(sys.stdin)
-sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
-
-i = 0
-for line in sys.stdin:
- filename = "%s%d"%(graPrefix,i)
- if not os.path.exists(filename):
- filenameGz = filename + ".gz"
- if not os.path.exists(filenameGz):
- print >>sys.stderr, "Grammar file not found: ", filename, filenameGz
- sys.exit(1)
- else:
- filename = filenameGz
-
- if obsFeatsFile:
- obsFeats = obsFeatsFile.next().strip()
- print '<seg id="%d" features="%s" grammar="%s"> '%(i,obsFeats,filename) + escape(line.strip()) + " </seg>"
- else:
- print '<seg id="%d" grammar="%s"> '%(i,filename) + escape(line.strip()) + " </seg>"
- i+=1
-