diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-08-01 17:32:37 +0200 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-08-01 17:32:37 +0200 |
commit | eb3ea4fd5dff1c94b237af792c9f7bf421d79d96 (patch) | |
tree | 2acd7674f36e6dc6e815c5856519fdea1a2d6bf8 /sa-extract/extractor.py | |
parent | e816274e337a066df1b1e86ef00136a021a17caf (diff) | |
parent | 193d137056c3c4f73d66f8db84691d63307de894 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'sa-extract/extractor.py')
-rwxr-xr-x | sa-extract/extractor.py | 60 |
1 files changed, 0 insertions, 60 deletions
diff --git a/sa-extract/extractor.py b/sa-extract/extractor.py deleted file mode 100755 index 9d66ebf0..00000000 --- a/sa-extract/extractor.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python - -# vim:expandtab:shiftwidth=4 - -import sys, gc, monitor, sgml -import optparse -import model -import log -import cn - -models = [] - -def add_model(m,w=0.0): - models.append(m) - -def extract_grammar(input): - confnet = cn.ConfusionNet(input) - meta = input.meta - for m in models: - m.input(confnet.columns, meta) - -if __name__ == "__main__": - optparser = optparse.OptionParser() - optparser.add_option("-c", "--config", dest="config", help="configuration module") - optparser.add_option("-x", "--extra", dest="extra", help="output grammar name override") - (opts,args) = optparser.parse_args() - - if opts.config is None: - raise ValueError, "You must specify a configuration file." - else: - if log.level >= 1: - log.write("Reading configuration from %s\n" % opts.config) - execfile(opts.config) - - if len(args) >= 1 and args[0] != "-": - input_file = file(args[0], "r") - else: - input_file = sys.stdin - - if len(args) >= 2 and args[1] != "-": - output_file = file(args[1], "w") - else: - output_file = sys.stdout - - gc.collect() - if log.level >= 1: - log.write("all structures loaded, memory %s, time %s\n" % (monitor.memory(), monitor.cpu())) - log.write("models: %s\n" % (" ".join(str(x.name) for x in models))) - - sents = sgml.read_raw(input_file) - for sent in sents: - mark = sent.getmark() - if mark is not None: - (tag, attrs) = mark - if tag == "seg": - sent.unmark() - dattrs = sgml.attrs_to_dict(attrs) - sent.meta = attrs - extract_grammar(sent) - |