summaryrefslogtreecommitdiff
path: root/sa-extract/extractor.py
diff options
context:
space:
mode:
authorChris Dyer <cdyer@cs.cmu.edu>2012-07-27 17:31:00 -0400
committerChris Dyer <cdyer@cs.cmu.edu>2012-07-27 17:31:00 -0400
commit733e1b1507d27d4f53055f740e8098f56215ab8f (patch)
tree002266f993275b9f1e28ae2f1153e1274183be68 /sa-extract/extractor.py
parent8fdc3681fb7551e7faeff9f720102cdd417ba077 (diff)
remove old suffix array extractor (use the one in python/ instead)
Diffstat (limited to 'sa-extract/extractor.py')
-rwxr-xr-xsa-extract/extractor.py60
1 files changed, 0 insertions, 60 deletions
diff --git a/sa-extract/extractor.py b/sa-extract/extractor.py
deleted file mode 100755
index 9d66ebf0..00000000
--- a/sa-extract/extractor.py
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env python
-
-# vim:expandtab:shiftwidth=4
-
-import sys, gc, monitor, sgml
-import optparse
-import model
-import log
-import cn
-
-models = []
-
-def add_model(m,w=0.0):
- models.append(m)
-
-def extract_grammar(input):
- confnet = cn.ConfusionNet(input)
- meta = input.meta
- for m in models:
- m.input(confnet.columns, meta)
-
-if __name__ == "__main__":
- optparser = optparse.OptionParser()
- optparser.add_option("-c", "--config", dest="config", help="configuration module")
- optparser.add_option("-x", "--extra", dest="extra", help="output grammar name override")
- (opts,args) = optparser.parse_args()
-
- if opts.config is None:
- raise ValueError, "You must specify a configuration file."
- else:
- if log.level >= 1:
- log.write("Reading configuration from %s\n" % opts.config)
- execfile(opts.config)
-
- if len(args) >= 1 and args[0] != "-":
- input_file = file(args[0], "r")
- else:
- input_file = sys.stdin
-
- if len(args) >= 2 and args[1] != "-":
- output_file = file(args[1], "w")
- else:
- output_file = sys.stdout
-
- gc.collect()
- if log.level >= 1:
- log.write("all structures loaded, memory %s, time %s\n" % (monitor.memory(), monitor.cpu()))
- log.write("models: %s\n" % (" ".join(str(x.name) for x in models)))
-
- sents = sgml.read_raw(input_file)
- for sent in sents:
- mark = sent.getmark()
- if mark is not None:
- (tag, attrs) = mark
- if tag == "seg":
- sent.unmark()
- dattrs = sgml.attrs_to_dict(attrs)
- sent.meta = attrs
- extract_grammar(sent)
-