summaryrefslogtreecommitdiff
path: root/python/pkg/cdec/sa
diff options
context:
space:
mode:
Diffstat (limited to 'python/pkg/cdec/sa')
-rw-r--r--python/pkg/cdec/sa/compile.py21
-rw-r--r--python/pkg/cdec/sa/extract.py7
2 files changed, 26 insertions, 2 deletions
diff --git a/python/pkg/cdec/sa/compile.py b/python/pkg/cdec/sa/compile.py
index ce249c0f..d4cd8387 100644
--- a/python/pkg/cdec/sa/compile.py
+++ b/python/pkg/cdec/sa/compile.py
@@ -4,6 +4,7 @@ import os
import logging
import cdec.configobj
import cdec.sa
+from cdec.sa._sa import monitor_cpu
import sys
MAX_PHRASE_LENGTH = 4
@@ -21,6 +22,7 @@ def precompute(f_sa, max_len, max_nt, max_size, min_gap, rank1, rank2, tight_phr
return precomp
def main():
+ preprocess_start_time = monitor_cpu()
sys.setrecursionlimit(sys.getrecursionlimit() * 100)
logging.basicConfig(level=logging.INFO)
@@ -73,31 +75,46 @@ def main():
a_bin = os.path.join(args.output, 'a.bin')
lex_bin = os.path.join(args.output, 'lex.bin')
+ start_time = monitor_cpu()
logger.info('Compiling source suffix array')
if args.bitext:
f_sa = cdec.sa.SuffixArray(from_text=args.bitext, side='source')
else:
f_sa = cdec.sa.SuffixArray(from_text=args.source)
f_sa.write_binary(f_sa_bin)
+ stop_time = monitor_cpu()
+ logger.info('Compiling source suffix array took %f seconds', stop_time - start_time)
+ start_time = monitor_cpu()
logger.info('Compiling target data array')
if args.bitext:
e = cdec.sa.DataArray(from_text=args.bitext, side='target')
else:
e = cdec.sa.DataArray(from_text=args.target)
e.write_binary(e_bin)
+ stop_time = monitor_cpu()
+ logger.info('Compiling target data array took %f seconds', stop_time - start_time)
+ start_time = monitor_cpu()
logger.info('Precomputing frequent phrases')
precompute(f_sa, *params).write_binary(precomp_bin)
+ stop_time = monitor_cpu()
+ logger.info('Compiling precomputations took %f seconds', stop_time - start_time)
+ start_time = monitor_cpu()
logger.info('Compiling alignment')
a = cdec.sa.Alignment(from_text=args.alignment)
a.write_binary(a_bin)
+ stop_time = monitor_cpu()
+ logger.info('Compiling alignment took %f seonds', stop_time - start_time)
+ start_time = monitor_cpu()
logger.info('Compiling bilexical dictionary')
lex = cdec.sa.BiLex(from_data=True, alignment=a, earray=e, fsarray=f_sa)
lex.write_binary(lex_bin)
-
+ stop_time = monitor_cpu()
+ logger.info('Compiling bilexical dictionary took %f seconds', stop_time - start_time)
+
# Write configuration
config = cdec.configobj.ConfigObj(args.config, unrepr=True)
config['f_sa_file'] = os.path.abspath(f_sa_bin)
@@ -108,6 +125,8 @@ def main():
for name, value in zip(param_names, params):
config[name] = value
config.write()
+ preprocess_stop_time = monitor_cpu()
+ logger.info('Overall preprocessing step took %f seconds', preprocess_stop_time - preprocess_start_time)
if __name__ == '__main__':
main()
diff --git a/python/pkg/cdec/sa/extract.py b/python/pkg/cdec/sa/extract.py
index b7d2fe6e..87b7d5d4 100644
--- a/python/pkg/cdec/sa/extract.py
+++ b/python/pkg/cdec/sa/extract.py
@@ -7,6 +7,7 @@ import re
import multiprocessing as mp
import signal
import cdec.sa
+from cdec.sa._sa import monitor_cpu
extractor, prefix = None, None
def make_extractor(config, grammars, features):
@@ -62,7 +63,8 @@ def main():
sys.stderr.write('Error: feature definition file <{0}>'
' should be a python module\n'.format(featdef))
sys.exit(1)
-
+
+ start_time = monitor_cpu()
if args.jobs > 1:
logging.info('Starting %d workers; chunk size: %d', args.jobs, args.chunksize)
pool = mp.Pool(args.jobs, make_extractor, (args.config, args.grammars, args.features))
@@ -76,5 +78,8 @@ def main():
for output in map(extract, enumerate(sys.stdin)):
print(output)
+ stop_time = monitor_cpu()
+ logging.info("Overall extraction step took %f seconds", stop_time - start_time)
+
if __name__ == '__main__':
main()