diff options
author | Kenneth Heafield <github@kheafield.com> | 2012-08-03 07:46:54 -0400 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2012-08-03 07:46:54 -0400 |
commit | be1ab0a8937f9c5668ea5e6c31b798e87672e55e (patch) | |
tree | a13aad60ab6cced213401bce6a38ac885ba171ba /sa-extract/extractor.py | |
parent | e5d6f4ae41009c26978ecd62668501af9762b0bc (diff) | |
parent | 9fe0219562e5db25171cce8776381600ff9a5649 (diff) |
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'sa-extract/extractor.py')
-rwxr-xr-x | sa-extract/extractor.py | 60 |
1 files changed, 0 insertions, 60 deletions
diff --git a/sa-extract/extractor.py b/sa-extract/extractor.py deleted file mode 100755 index 9d66ebf0..00000000 --- a/sa-extract/extractor.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python - -# vim:expandtab:shiftwidth=4 - -import sys, gc, monitor, sgml -import optparse -import model -import log -import cn - -models = [] - -def add_model(m,w=0.0): - models.append(m) - -def extract_grammar(input): - confnet = cn.ConfusionNet(input) - meta = input.meta - for m in models: - m.input(confnet.columns, meta) - -if __name__ == "__main__": - optparser = optparse.OptionParser() - optparser.add_option("-c", "--config", dest="config", help="configuration module") - optparser.add_option("-x", "--extra", dest="extra", help="output grammar name override") - (opts,args) = optparser.parse_args() - - if opts.config is None: - raise ValueError, "You must specify a configuration file." - else: - if log.level >= 1: - log.write("Reading configuration from %s\n" % opts.config) - execfile(opts.config) - - if len(args) >= 1 and args[0] != "-": - input_file = file(args[0], "r") - else: - input_file = sys.stdin - - if len(args) >= 2 and args[1] != "-": - output_file = file(args[1], "w") - else: - output_file = sys.stdout - - gc.collect() - if log.level >= 1: - log.write("all structures loaded, memory %s, time %s\n" % (monitor.memory(), monitor.cpu())) - log.write("models: %s\n" % (" ".join(str(x.name) for x in models))) - - sents = sgml.read_raw(input_file) - for sent in sents: - mark = sent.getmark() - if mark is not None: - (tag, attrs) = mark - if tag == "seg": - sent.unmark() - dattrs = sgml.attrs_to_dict(attrs) - sent.meta = attrs - extract_grammar(sent) - |