From b317e0efd2398d75d70e027bb1e2cf442e683981 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Fri, 27 Jul 2012 17:31:00 -0400 Subject: remove old suffix array extractor (use the one in python/ instead) --- sa-extract/extractor.py | 60 ------------------------------------------------- 1 file changed, 60 deletions(-) delete mode 100755 sa-extract/extractor.py (limited to 'sa-extract/extractor.py') diff --git a/sa-extract/extractor.py b/sa-extract/extractor.py deleted file mode 100755 index 9d66ebf0..00000000 --- a/sa-extract/extractor.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python - -# vim:expandtab:shiftwidth=4 - -import sys, gc, monitor, sgml -import optparse -import model -import log -import cn - -models = [] - -def add_model(m,w=0.0): - models.append(m) - -def extract_grammar(input): - confnet = cn.ConfusionNet(input) - meta = input.meta - for m in models: - m.input(confnet.columns, meta) - -if __name__ == "__main__": - optparser = optparse.OptionParser() - optparser.add_option("-c", "--config", dest="config", help="configuration module") - optparser.add_option("-x", "--extra", dest="extra", help="output grammar name override") - (opts,args) = optparser.parse_args() - - if opts.config is None: - raise ValueError, "You must specify a configuration file." - else: - if log.level >= 1: - log.write("Reading configuration from %s\n" % opts.config) - execfile(opts.config) - - if len(args) >= 1 and args[0] != "-": - input_file = file(args[0], "r") - else: - input_file = sys.stdin - - if len(args) >= 2 and args[1] != "-": - output_file = file(args[1], "w") - else: - output_file = sys.stdout - - gc.collect() - if log.level >= 1: - log.write("all structures loaded, memory %s, time %s\n" % (monitor.memory(), monitor.cpu())) - log.write("models: %s\n" % (" ".join(str(x.name) for x in models))) - - sents = sgml.read_raw(input_file) - for sent in sents: - mark = sent.getmark() - if mark is not None: - (tag, attrs) = mark - if tag == "seg": - sent.unmark() - dattrs = sgml.attrs_to_dict(attrs) - sent.meta = attrs - extract_grammar(sent) - -- cgit v1.2.3