diff options
author | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-02-26 00:03:48 -0500 |
---|---|---|
committer | Chris Dyer <cdyer@allegro.clab.cs.cmu.edu> | 2014-02-26 00:03:48 -0500 |
commit | 1cb85d42d12bdbe21f9f258fa50fcc1c73e8cfcc (patch) | |
tree | ac2f498f686be4e532c94b3d01537757f90d38f0 /python/cdec/sa/extract.py | |
parent | 7c0ee6a2e22a1ace580ed1dcad65a4c591783135 (diff) | |
parent | 3cb43f4e3980457cbb7b749cee51a5bb777e18f8 (diff) |
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'python/cdec/sa/extract.py')
-rw-r--r-- | python/cdec/sa/extract.py | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/python/cdec/sa/extract.py b/python/cdec/sa/extract.py index b6502c52..d8f14b11 100644 --- a/python/cdec/sa/extract.py +++ b/python/cdec/sa/extract.py @@ -12,12 +12,13 @@ from cdec.sa._sa import monitor_cpu extractor, prefix = None, None online, compress = False, False +vocab = None def make_extractor(args): global extractor, prefix, online, compress signal.signal(signal.SIGINT, signal.SIG_IGN) # Let parent process catch Ctrl+C load_features(args.features) - extractor = cdec.sa.GrammarExtractor(args.config, online) + extractor = cdec.sa.GrammarExtractor(args.config, online, vocab) prefix = args.grammars online = args.online compress = args.compress @@ -63,7 +64,7 @@ def extract(inp): return '<seg grammar="{}" id="{}">{}</seg>{}'.format(grammar_file, i, sentence, suffix) def main(): - global online + global online, vocab logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser(description='Extract grammars from a compiled corpus.') parser.add_argument('-c', '--config', required=True, @@ -78,6 +79,8 @@ def main(): help='additional feature definitions') parser.add_argument('-o', '--online', action='store_true', help='online grammar extraction') + parser.add_argument('-e', '--except-vocab', default=None, + help='add LM and Lex except features (use with -o, pass vocab.gz)') parser.add_argument('-z', '--compress', action='store_true', help='compress grammars with gzip') args = parser.parse_args() @@ -91,6 +94,7 @@ def main(): sys.exit(1) online = args.online + vocab = args.except_vocab start_time = monitor_cpu() if args.jobs > 1: |