summaryrefslogtreecommitdiff
path: root/sa-extract/extractor.py
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-08-01 17:32:37 +0200
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-08-01 17:32:37 +0200
commit3f8e33cfe481a09c121a410e66a6074b5d05683e (patch)
treea41ecaf0bbb69fa91a581623abe89d41219c04f8 /sa-extract/extractor.py
parentc139ce495861bb341e1b86a85ad4559f9ad53c14 (diff)
parent9fe0219562e5db25171cce8776381600ff9a5649 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'sa-extract/extractor.py')
-rwxr-xr-xsa-extract/extractor.py60
1 files changed, 0 insertions, 60 deletions
diff --git a/sa-extract/extractor.py b/sa-extract/extractor.py
deleted file mode 100755
index 9d66ebf0..00000000
--- a/sa-extract/extractor.py
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env python
-
-# vim:expandtab:shiftwidth=4
-
-import sys, gc, monitor, sgml
-import optparse
-import model
-import log
-import cn
-
-models = []
-
-def add_model(m,w=0.0):
- models.append(m)
-
-def extract_grammar(input):
- confnet = cn.ConfusionNet(input)
- meta = input.meta
- for m in models:
- m.input(confnet.columns, meta)
-
-if __name__ == "__main__":
- optparser = optparse.OptionParser()
- optparser.add_option("-c", "--config", dest="config", help="configuration module")
- optparser.add_option("-x", "--extra", dest="extra", help="output grammar name override")
- (opts,args) = optparser.parse_args()
-
- if opts.config is None:
- raise ValueError, "You must specify a configuration file."
- else:
- if log.level >= 1:
- log.write("Reading configuration from %s\n" % opts.config)
- execfile(opts.config)
-
- if len(args) >= 1 and args[0] != "-":
- input_file = file(args[0], "r")
- else:
- input_file = sys.stdin
-
- if len(args) >= 2 and args[1] != "-":
- output_file = file(args[1], "w")
- else:
- output_file = sys.stdout
-
- gc.collect()
- if log.level >= 1:
- log.write("all structures loaded, memory %s, time %s\n" % (monitor.memory(), monitor.cpu()))
- log.write("models: %s\n" % (" ".join(str(x.name) for x in models)))
-
- sents = sgml.read_raw(input_file)
- for sent in sents:
- mark = sent.getmark()
- if mark is not None:
- (tag, attrs) = mark
- if tag == "seg":
- sent.unmark()
- dattrs = sgml.attrs_to_dict(attrs)
- sent.meta = attrs
- extract_grammar(sent)
-