From a49f3a5b19547e7e46a652b22fab601da8fc210f Mon Sep 17 00:00:00 2001 From: Michael Denkowski Date: Thu, 6 Mar 2014 15:35:10 -0800 Subject: Compile count-based bilex table for online grammar extraction. --- python/cdec/sa/compile.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'python/cdec/sa/compile.py') diff --git a/python/cdec/sa/compile.py b/python/cdec/sa/compile.py index caa93f8b..3cdf212f 100644 --- a/python/cdec/sa/compile.py +++ b/python/cdec/sa/compile.py @@ -54,6 +54,8 @@ def main(): help='Bitext word alignment') parser.add_argument('-o', '--output', required=True, help='Output path') + parser.add_argument('--online', action='store_true', + help='Compile data for online grammar extraction') args = parser.parse_args() if not ((args.source and args.target) or args.bitext): @@ -74,6 +76,8 @@ def main(): precomp_bin = os.path.join(args.output, precomp_file) a_bin = os.path.join(args.output, 'a.bin') lex_bin = os.path.join(args.output, 'lex.bin') + # online only + bilex_file = os.path.join(args.output, 'bilex.gz') config = cdec.configobj.ConfigObj(args.config, unrepr=True) config['f_sa_file'] = os.path.abspath(f_sa_bin) @@ -81,6 +85,8 @@ def main(): config['a_file'] = os.path.abspath(a_bin) config['lex_file'] = os.path.abspath(lex_bin) config['precompute_file'] = os.path.abspath(precomp_bin) + if args.online: + config['bilex_file'] = os.path.abspath(bilex_file) start_time = monitor_cpu() logger.info('Compiling source suffix array') @@ -122,6 +128,17 @@ def main(): stop_time = monitor_cpu() logger.info('Compiling bilexical dictionary took %f seconds', stop_time - start_time) + if args.online: + start_time = monitor_cpu() + logger.info('Compiling online bilexical dictionary') + if args.bitext: + bilex = cdec.sa.online.Bilex(alignment_f=args.alignment, text_f=args.bitext) + else: + bilex = cdec.sa.online.Bilex(alignment_f=args.alignment, text_f=args.source, text_target_f=args.target) + bilex.write(bilex_file) + stop_time = monitor_cpu() + logger.info('Compiling online bilexical dictionary took %f seconds', stop_time - start_time) + # Write configuration for name, value in zip(param_names, params): config[name] = value -- cgit v1.2.3