summaryrefslogtreecommitdiff
path: root/python/cdec/sa/compile.py
diff options
context:
space:
mode:
authorMichael Denkowski <mdenkows@cs.cmu.edu>2014-03-06 15:35:10 -0800
committerMichael Denkowski <mdenkows@cs.cmu.edu>2014-03-06 15:35:10 -0800
commita49f3a5b19547e7e46a652b22fab601da8fc210f (patch)
treee601ada49b2751344d14175b005182ca842a29cf /python/cdec/sa/compile.py
parent18aa808143ab06da361a557350f6b3dd964717ce (diff)
Compile count-based bilex table for online grammar extraction.
Diffstat (limited to 'python/cdec/sa/compile.py')
-rw-r--r--python/cdec/sa/compile.py17
1 files changed, 17 insertions, 0 deletions
diff --git a/python/cdec/sa/compile.py b/python/cdec/sa/compile.py
index caa93f8b..3cdf212f 100644
--- a/python/cdec/sa/compile.py
+++ b/python/cdec/sa/compile.py
@@ -54,6 +54,8 @@ def main():
help='Bitext word alignment')
parser.add_argument('-o', '--output', required=True,
help='Output path')
+ parser.add_argument('--online', action='store_true',
+ help='Compile data for online grammar extraction')
args = parser.parse_args()
if not ((args.source and args.target) or args.bitext):
@@ -74,6 +76,8 @@ def main():
precomp_bin = os.path.join(args.output, precomp_file)
a_bin = os.path.join(args.output, 'a.bin')
lex_bin = os.path.join(args.output, 'lex.bin')
+ # online only
+ bilex_file = os.path.join(args.output, 'bilex.gz')
config = cdec.configobj.ConfigObj(args.config, unrepr=True)
config['f_sa_file'] = os.path.abspath(f_sa_bin)
@@ -81,6 +85,8 @@ def main():
config['a_file'] = os.path.abspath(a_bin)
config['lex_file'] = os.path.abspath(lex_bin)
config['precompute_file'] = os.path.abspath(precomp_bin)
+ if args.online:
+ config['bilex_file'] = os.path.abspath(bilex_file)
start_time = monitor_cpu()
logger.info('Compiling source suffix array')
@@ -122,6 +128,17 @@ def main():
stop_time = monitor_cpu()
logger.info('Compiling bilexical dictionary took %f seconds', stop_time - start_time)
+ if args.online:
+ start_time = monitor_cpu()
+ logger.info('Compiling online bilexical dictionary')
+ if args.bitext:
+ bilex = cdec.sa.online.Bilex(alignment_f=args.alignment, text_f=args.bitext)
+ else:
+ bilex = cdec.sa.online.Bilex(alignment_f=args.alignment, text_f=args.source, text_target_f=args.target)
+ bilex.write(bilex_file)
+ stop_time = monitor_cpu()
+ logger.info('Compiling online bilexical dictionary took %f seconds', stop_time - start_time)
+
# Write configuration
for name, value in zip(param_names, params):
config[name] = value