diff options
| author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-08-01 17:32:37 +0200 | 
|---|---|---|
| committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-08-01 17:32:37 +0200 | 
| commit | 3f8e33cfe481a09c121a410e66a6074b5d05683e (patch) | |
| tree | a41ecaf0bbb69fa91a581623abe89d41219c04f8 /sa-extract/extractor.py | |
| parent | c139ce495861bb341e1b86a85ad4559f9ad53c14 (diff) | |
| parent | 9fe0219562e5db25171cce8776381600ff9a5649 (diff) | |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'sa-extract/extractor.py')
| -rwxr-xr-x | sa-extract/extractor.py | 60 | 
1 files changed, 0 insertions, 60 deletions
diff --git a/sa-extract/extractor.py b/sa-extract/extractor.py deleted file mode 100755 index 9d66ebf0..00000000 --- a/sa-extract/extractor.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python - -# vim:expandtab:shiftwidth=4 - -import sys, gc, monitor, sgml -import optparse -import model -import log -import cn - -models = [] - -def add_model(m,w=0.0): -    models.append(m) - -def extract_grammar(input): -    confnet = cn.ConfusionNet(input) -    meta = input.meta -    for m in models: -        m.input(confnet.columns, meta) - -if __name__ == "__main__": -    optparser = optparse.OptionParser() -    optparser.add_option("-c", "--config", dest="config", help="configuration module") -    optparser.add_option("-x", "--extra", dest="extra", help="output grammar name override") -    (opts,args) = optparser.parse_args() - -    if opts.config is None: -        raise ValueError, "You must specify a configuration file." -    else: -        if log.level >= 1: -            log.write("Reading configuration from %s\n" % opts.config) -        execfile(opts.config) - -    if len(args) >= 1 and args[0] != "-": -        input_file = file(args[0], "r") -    else: -        input_file = sys.stdin - -    if len(args) >= 2 and args[1] != "-": -        output_file = file(args[1], "w") -    else: -        output_file = sys.stdout - -    gc.collect() -    if log.level >= 1: -        log.write("all structures loaded, memory %s, time %s\n" % (monitor.memory(), monitor.cpu())) -        log.write("models: %s\n" % (" ".join(str(x.name) for x in models))) - -    sents = sgml.read_raw(input_file) -    for sent in sents: -        mark = sent.getmark() -        if mark is not None: -            (tag, attrs) = mark -            if tag == "seg": -                sent.unmark() -                dattrs = sgml.attrs_to_dict(attrs) -                sent.meta = attrs -        extract_grammar(sent) -  | 
