diff options
author | Chris Dyer <redpony@gmail.com> | 2014-02-27 19:45:14 -0500 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2014-02-27 19:45:14 -0500 |
commit | 5675965782e2c9201a7a2fe54b542f5b06d660ef (patch) | |
tree | 2fefaf95c72f2caa72185e4579ad3b715e1cc5c4 /python/cdec/sa/extract.py | |
parent | ed56625e5edeadbe9297680b07e269c42b7ea420 (diff) | |
parent | 53f4328e5e5cc72c6d483783edb85ba16b414caf (diff) |
Merge branch 'master' of https://github.com/redpony/cdec
Diffstat (limited to 'python/cdec/sa/extract.py')
-rw-r--r-- | python/cdec/sa/extract.py | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/python/cdec/sa/extract.py b/python/cdec/sa/extract.py index b6502c52..d8f14b11 100644 --- a/python/cdec/sa/extract.py +++ b/python/cdec/sa/extract.py @@ -12,12 +12,13 @@ from cdec.sa._sa import monitor_cpu extractor, prefix = None, None online, compress = False, False +vocab = None def make_extractor(args): global extractor, prefix, online, compress signal.signal(signal.SIGINT, signal.SIG_IGN) # Let parent process catch Ctrl+C load_features(args.features) - extractor = cdec.sa.GrammarExtractor(args.config, online) + extractor = cdec.sa.GrammarExtractor(args.config, online, vocab) prefix = args.grammars online = args.online compress = args.compress @@ -63,7 +64,7 @@ def extract(inp): return '<seg grammar="{}" id="{}">{}</seg>{}'.format(grammar_file, i, sentence, suffix) def main(): - global online + global online, vocab logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser(description='Extract grammars from a compiled corpus.') parser.add_argument('-c', '--config', required=True, @@ -78,6 +79,8 @@ def main(): help='additional feature definitions') parser.add_argument('-o', '--online', action='store_true', help='online grammar extraction') + parser.add_argument('-e', '--except-vocab', default=None, + help='add LM and Lex except features (use with -o, pass vocab.gz)') parser.add_argument('-z', '--compress', action='store_true', help='compress grammars with gzip') args = parser.parse_args() @@ -91,6 +94,7 @@ def main(): sys.exit(1) online = args.online + vocab = args.except_vocab start_time = monitor_cpu() if args.jobs > 1: |