summaryrefslogtreecommitdiff
path: root/python/pkg/cdec/sa/compile.py
diff options
context:
space:
mode:
authorChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
committerChris Dyer <cdyer@allegro.clab.cs.cmu.edu>2013-04-23 19:35:18 -0400
commit6d347f1ce078dede3da0e1498f75e357351c6543 (patch)
tree8e872b8747c530e741e55e25e9917c1bd8b32c5b /python/pkg/cdec/sa/compile.py
parentd11b76def6899790161c47a73018146311356d8b (diff)
parent5e9605b65202f4e5fc59843b197d88c4774f0ac8 (diff)
merge paul's extractor code
Diffstat (limited to 'python/pkg/cdec/sa/compile.py')
-rw-r--r--python/pkg/cdec/sa/compile.py21
1 files changed, 20 insertions, 1 deletions
diff --git a/python/pkg/cdec/sa/compile.py b/python/pkg/cdec/sa/compile.py
index ce249c0f..d4cd8387 100644
--- a/python/pkg/cdec/sa/compile.py
+++ b/python/pkg/cdec/sa/compile.py
@@ -4,6 +4,7 @@ import os
import logging
import cdec.configobj
import cdec.sa
+from cdec.sa._sa import monitor_cpu
import sys
MAX_PHRASE_LENGTH = 4
@@ -21,6 +22,7 @@ def precompute(f_sa, max_len, max_nt, max_size, min_gap, rank1, rank2, tight_phr
return precomp
def main():
+ preprocess_start_time = monitor_cpu()
sys.setrecursionlimit(sys.getrecursionlimit() * 100)
logging.basicConfig(level=logging.INFO)
@@ -73,31 +75,46 @@ def main():
a_bin = os.path.join(args.output, 'a.bin')
lex_bin = os.path.join(args.output, 'lex.bin')
+ start_time = monitor_cpu()
logger.info('Compiling source suffix array')
if args.bitext:
f_sa = cdec.sa.SuffixArray(from_text=args.bitext, side='source')
else:
f_sa = cdec.sa.SuffixArray(from_text=args.source)
f_sa.write_binary(f_sa_bin)
+ stop_time = monitor_cpu()
+ logger.info('Compiling source suffix array took %f seconds', stop_time - start_time)
+ start_time = monitor_cpu()
logger.info('Compiling target data array')
if args.bitext:
e = cdec.sa.DataArray(from_text=args.bitext, side='target')
else:
e = cdec.sa.DataArray(from_text=args.target)
e.write_binary(e_bin)
+ stop_time = monitor_cpu()
+ logger.info('Compiling target data array took %f seconds', stop_time - start_time)
+ start_time = monitor_cpu()
logger.info('Precomputing frequent phrases')
precompute(f_sa, *params).write_binary(precomp_bin)
+ stop_time = monitor_cpu()
+ logger.info('Compiling precomputations took %f seconds', stop_time - start_time)
+ start_time = monitor_cpu()
logger.info('Compiling alignment')
a = cdec.sa.Alignment(from_text=args.alignment)
a.write_binary(a_bin)
+ stop_time = monitor_cpu()
+ logger.info('Compiling alignment took %f seonds', stop_time - start_time)
+ start_time = monitor_cpu()
logger.info('Compiling bilexical dictionary')
lex = cdec.sa.BiLex(from_data=True, alignment=a, earray=e, fsarray=f_sa)
lex.write_binary(lex_bin)
-
+ stop_time = monitor_cpu()
+ logger.info('Compiling bilexical dictionary took %f seconds', stop_time - start_time)
+
# Write configuration
config = cdec.configobj.ConfigObj(args.config, unrepr=True)
config['f_sa_file'] = os.path.abspath(f_sa_bin)
@@ -108,6 +125,8 @@ def main():
for name, value in zip(param_names, params):
config[name] = value
config.write()
+ preprocess_stop_time = monitor_cpu()
+ logger.info('Overall preprocessing step took %f seconds', preprocess_stop_time - preprocess_start_time)
if __name__ == '__main__':
main()