summaryrefslogtreecommitdiff
path: root/python/cdec/sa/compile.py
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-03-16 17:48:48 +0100
committerPatrick Simianer <p@simianer.de>2014-03-16 17:48:48 +0100
commit5250fd67a4b8f242068cff87f0a6a4211f8b0fcf (patch)
treef1401c1fd3eeae8671e59baf0d2169d1eb721cb7 /python/cdec/sa/compile.py
parent3eedf96b5a08b3e3414888d328c505814b84d8db (diff)
parentcc87bfed0697583b7c11243913254dde3c0047d4 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'python/cdec/sa/compile.py')
-rw-r--r--python/cdec/sa/compile.py19
1 files changed, 19 insertions, 0 deletions
diff --git a/python/cdec/sa/compile.py b/python/cdec/sa/compile.py
index caa93f8b..a5bd0699 100644
--- a/python/cdec/sa/compile.py
+++ b/python/cdec/sa/compile.py
@@ -54,6 +54,8 @@ def main():
help='Bitext word alignment')
parser.add_argument('-o', '--output', required=True,
help='Output path')
+ parser.add_argument('--online', action='store_true',
+ help='Compile data for online grammar extraction')
args = parser.parse_args()
if not ((args.source and args.target) or args.bitext):
@@ -74,6 +76,8 @@ def main():
precomp_bin = os.path.join(args.output, precomp_file)
a_bin = os.path.join(args.output, 'a.bin')
lex_bin = os.path.join(args.output, 'lex.bin')
+ # online only
+ bilex_file = os.path.join(args.output, 'bilex.gz')
config = cdec.configobj.ConfigObj(args.config, unrepr=True)
config['f_sa_file'] = os.path.abspath(f_sa_bin)
@@ -81,6 +85,8 @@ def main():
config['a_file'] = os.path.abspath(a_bin)
config['lex_file'] = os.path.abspath(lex_bin)
config['precompute_file'] = os.path.abspath(precomp_bin)
+ if args.online:
+ config['bilex_file'] = os.path.abspath(bilex_file)
start_time = monitor_cpu()
logger.info('Compiling source suffix array')
@@ -122,6 +128,19 @@ def main():
stop_time = monitor_cpu()
logger.info('Compiling bilexical dictionary took %f seconds', stop_time - start_time)
+ if args.online:
+ start_time = monitor_cpu()
+ logger.info('Compiling online bilexical dictionary')
+ if args.bitext:
+ bilex = cdec.sa.online.Bilex()
+ bilex.add_bitext(args.alignment, args.bitext)
+ else:
+ bilex = cdec.sa.online.Bilex()
+ bilex.add_bitext(args.alignment, args.source, args.target)
+ bilex.write(bilex_file)
+ stop_time = monitor_cpu()
+ logger.info('Compiling online bilexical dictionary took %f seconds', stop_time - start_time)
+
# Write configuration
for name, value in zip(param_names, params):
config[name] = value