summaryrefslogtreecommitdiff
path: root/python/cdec/sa/extract.py
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2014-02-26 00:03:48 -0500
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2014-02-26 00:03:48 -0500
commit1cb85d42d12bdbe21f9f258fa50fcc1c73e8cfcc (patch)
treeac2f498f686be4e532c94b3d01537757f90d38f0 /python/cdec/sa/extract.py
parent7c0ee6a2e22a1ace580ed1dcad65a4c591783135 (diff)
parent3cb43f4e3980457cbb7b749cee51a5bb777e18f8 (diff)
Merge branch 'master' of github.com:redpony/cdec
Diffstat (limited to 'python/cdec/sa/extract.py')
-rw-r--r--python/cdec/sa/extract.py8
1 files changed, 6 insertions, 2 deletions
diff --git a/python/cdec/sa/extract.py b/python/cdec/sa/extract.py
index b6502c52..d8f14b11 100644
--- a/python/cdec/sa/extract.py
+++ b/python/cdec/sa/extract.py
@@ -12,12 +12,13 @@ from cdec.sa._sa import monitor_cpu
extractor, prefix = None, None
online, compress = False, False
+vocab = None
def make_extractor(args):
global extractor, prefix, online, compress
signal.signal(signal.SIGINT, signal.SIG_IGN) # Let parent process catch Ctrl+C
load_features(args.features)
- extractor = cdec.sa.GrammarExtractor(args.config, online)
+ extractor = cdec.sa.GrammarExtractor(args.config, online, vocab)
prefix = args.grammars
online = args.online
compress = args.compress
@@ -63,7 +64,7 @@ def extract(inp):
return '<seg grammar="{}" id="{}">{}</seg>{}'.format(grammar_file, i, sentence, suffix)
def main():
- global online
+ global online, vocab
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser(description='Extract grammars from a compiled corpus.')
parser.add_argument('-c', '--config', required=True,
@@ -78,6 +79,8 @@ def main():
help='additional feature definitions')
parser.add_argument('-o', '--online', action='store_true',
help='online grammar extraction')
+ parser.add_argument('-e', '--except-vocab', default=None,
+ help='add LM and Lex except features (use with -o, pass vocab.gz)')
parser.add_argument('-z', '--compress', action='store_true',
help='compress grammars with gzip')
args = parser.parse_args()
@@ -91,6 +94,7 @@ def main():
sys.exit(1)
online = args.online
+ vocab = args.except_vocab
start_time = monitor_cpu()
if args.jobs > 1: