Better logging, save/load to default context

author: Michael Denkowski <mdenkows@cs.cmu.edu> 2013-10-01 14:08:05 -0700
committer: Michael Denkowski <mdenkows@cs.cmu.edu> 2013-10-01 14:08:05 -0700
commit: 51a83643d95ab0d7add9dd66b0b38044db10a797 (patch)
tree: 7413fc7a105c1a1f9a3d0a2fb2f25d34640d8c8c /realtime
parent: 4830491825f0c8740d505c192d60b388f64acf1b (diff)
4 files changed, 40 insertions, 29 deletions
diff --git a/realtime/realtime.py b/realtime/realtime.py
index be02d486..6ee785f8 100755
--- a/realtime/realtime.py
+++ b/realtime/realtime.py
@@ -57,6 +57,7 @@ def main():
 
     parser = Parser(description='Real-time adaptive translation with cdec.  (See README.md)')
     parser.add_argument('-c', '--config', required=True, help='Config directory')
+    parser.add_argument('-s', '--state', help='Load state file to default context (saved incremental data)')
     parser.add_argument('-n', '--normalize', help='Normalize text (tokenize, translate, detokenize)', action='store_true')
     parser.add_argument('-T', '--temp', help='Temp directory (default /tmp)', default='/tmp')
     parser.add_argument('-a', '--cache', help='Grammar cache size (default 5)', default='5')
@@ -74,6 +75,10 @@ def main():
             debug(translator, args.debug_test)
             return
 
+        # Load state if given
+        if args.state:
+            rtd.load_state(state)
+
         # Read lines and commands
         while True:
             line = sys.stdin.readline()
diff --git a/realtime/rt/aligner.py b/realtime/rt/aligner.py
index def3fcb5..bcc1ef87 100644
--- a/realtime/rt/aligner.py
+++ b/realtime/rt/aligner.py
@@ -6,6 +6,8 @@ import threading
 
 import util
 
+logger = logging.getLogger('rt.aligner')
+
 class ForceAligner:
 
     def __init__(self, fwd_params, fwd_err, rev_params, rev_err):
@@ -21,13 +23,13 @@ class ForceAligner:
         rev_cmd = [fast_align, '-i', '-', '-d', '-T', rev_T, '-m', rev_m, '-f', rev_params, '-r']
         tools_cmd = [atools, '-i', '-', '-j', '-', '-c', 'grow-diag-final-and']
 
-        logging.info('Executing: {}'.format(' '.join(fwd_cmd)))
+        logger.info('Executing: {}'.format(' '.join(fwd_cmd)))
         self.fwd_align = util.popen_io(fwd_cmd)
 
-        logging.info('Executing: {}'.format(' '.join(rev_cmd)))
+        logger.info('Executing: {}'.format(' '.join(rev_cmd)))
         self.rev_align = util.popen_io(rev_cmd)
 
-        logging.info('Executing: {}'.format(' '.join(tools_cmd)))
+        logger.info('Executing: {}'.format(' '.join(tools_cmd)))
         self.tools = util.popen_io(tools_cmd)
 
         # Used to guarantee thread safety
diff --git a/realtime/rt/decoder.py b/realtime/rt/decoder.py
index 1cee4610..e6e7489d 100644
--- a/realtime/rt/decoder.py
+++ b/realtime/rt/decoder.py
@@ -5,6 +5,8 @@ import threading
 
 import util
 
+logger = logging.getLogger('rt.decoder')
+
 class Decoder:
 
     def close(self, force=False):
@@ -29,7 +31,7 @@ class CdecDecoder(Decoder):
         cdec_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
         decoder = os.path.join(cdec_root, 'decoder', 'cdec')
         decoder_cmd = [decoder, '-c', config, '-w', weights]
-        logging.info('Executing: {}'.format(' '.join(decoder_cmd)))
+        logger.info('Executing: {}'.format(' '.join(decoder_cmd)))
         self.decoder = util.popen_io(decoder_cmd)
         self.lock = util.FIFOLock()
 
@@ -40,7 +42,7 @@ class MIRADecoder(Decoder):
         mira = os.path.join(cdec_root, 'training', 'mira', 'kbest_cut_mira')
         #                                              optimizer=2 step=0.001    best=500,    k=500,       uniq, stream
         mira_cmd = [mira, '-c', config, '-w', weights, '-o', '2', '-C', '0.001', '-b', '500', '-k', '500', '-u', '-t']
-        logging.info('Executing: {}'.format(' '.join(mira_cmd)))
+        logger.info('Executing: {}'.format(' '.join(mira_cmd)))
         self.decoder = util.popen_io(mira_cmd)
         self.lock = util.FIFOLock()
 
diff --git a/realtime/rt/rt.py b/realtime/rt/rt.py
index 43cc43b4..db831712 100644
--- a/realtime/rt/rt.py
+++ b/realtime/rt/rt.py
@@ -19,6 +19,8 @@ import util
 # Dummy input token that is unlikely to appear in normalized data (but no fatal errors if it does)
 LIKELY_OOV = '(OOV)'
 
+logger = logging.getLogger('rt')
+
 class RealtimeDecoder:
     '''Do not use directly unless you know what you're doing.  Use RealtimeTranslator.'''
 
@@ -48,7 +50,7 @@ class RealtimeDecoder:
         self.decoder = decoder.MIRADecoder(decoder_config_file, decoder_weights)
 
     def close(self, force=False):
-        logging.info('Closing decoder and removing {}'.format(self.tmp))
+        logger.info('Closing decoder and removing {}'.format(self.tmp))
         self.decoder.close(force)
         self.ref_fifo.close()
         shutil.rmtree(self.tmp)
@@ -75,7 +77,7 @@ class RealtimeTranslator:
         self.config = configdir
         # Temporary work dir
         self.tmp = tempfile.mkdtemp(dir=tmpdir, prefix='realtime.')
-        logging.info('Using temp dir {}'.format(self.tmp))
+        logger.info('Using temp dir {}'.format(self.tmp))
 
         # Normalization
         self.norm = norm
@@ -127,10 +129,10 @@ class RealtimeTranslator:
     def close(self, force=False):
         '''Cleanup'''
         if force:
-            logging.info('Forced shutdown: stopping immediately')
+            logger.info('Forced shutdown: stopping immediately')
         for ctx_name in list(self.ctx_names):
             self.drop_ctx(ctx_name, force)
-        logging.info('Closing processes')
+        logger.info('Closing processes')
         self.aligner.close(force)
         if self.norm:
             if not force:
@@ -141,7 +143,7 @@ class RealtimeTranslator:
             if not force:
                 self.tokenizer_lock.release()
                 self.detokenizer_lock.release()
-        logging.info('Deleting {}'.format(self.tmp))
+        logger.info('Deleting {}'.format(self.tmp))
         shutil.rmtree(self.tmp)
 
     def lazy_ctx(self, ctx_name):
@@ -149,7 +151,7 @@ class RealtimeTranslator:
         NOT threadsafe, acquire ctx_name lock before calling.'''
         if ctx_name in self.ctx_names:
             return
-        logging.info('New context: {}'.format(ctx_name))
+        logger.info('New context: {}'.format(ctx_name))
         self.ctx_names.add(ctx_name)
         self.ctx_data[ctx_name] = []
         self.grammar_files[ctx_name] = collections.deque()
@@ -164,11 +166,11 @@ class RealtimeTranslator:
         if not force:
             lock.acquire()
         if ctx_name not in self.ctx_names:
-            logging.info('No context found, no action: {}'.format(ctx_name))
+            logger.info('No context found, no action: {}'.format(ctx_name))
             if not force:
                 lock.release()
             return
-        logging.info('Dropping context: {}'.format(ctx_name))
+        logger.info('Dropping context: {}'.format(ctx_name))
         self.ctx_names.remove(ctx_name)
         self.ctx_data.pop(ctx_name)
         self.extractor.drop_ctx(ctx_name)
@@ -193,7 +195,7 @@ class RealtimeTranslator:
         grammar_file = grammar_dict.get(sentence, None)
         # Cache hit
         if grammar_file:
-            logging.info('Grammar cache hit: {}'.format(grammar_file))
+            logger.info('Grammar cache hit: {}'.format(grammar_file))
             self.extractor_lock.release()
             return grammar_file
         # Extract and cache
@@ -226,18 +228,18 @@ class RealtimeTranslator:
             return ''
         if self.norm:
             sentence = self.tokenize(sentence)
-            logging.info('Normalized input: {}'.format(sentence))
+            logger.info('Normalized input: {}'.format(sentence))
         grammar_file = self.grammar(sentence, ctx_name)
         decoder = self.decoders[ctx_name]
         start_time = time.time()
         hyp = decoder.decoder.decode(sentence, grammar_file)
         stop_time = time.time()
-        logging.info('Translation time: {} seconds'.format(stop_time - start_time))
+        logger.info('Translation time: {} seconds'.format(stop_time - start_time))
         # Empty reference: HPYPLM does not learn prior to next translation
         decoder.ref_fifo.write('\n')
         decoder.ref_fifo.flush()
         if self.norm:
-            logging.info('Normalized translation: {}'.format(hyp))
+            logger.info('Normalized translation: {}'.format(hyp))
             hyp = self.detokenize(hyp)
         lock.release()
         return hyp
@@ -271,9 +273,9 @@ class RealtimeTranslator:
                 cmd_name = cmd_name[0]
             (command, nargs) = self.COMMANDS.get(cmd_name, (None, None))
             if command and len(args[1:]) in nargs:
-                logging.info('{} ({}) ||| {}'.format(cmd_name, ctx_name, ' ||| '.join(args[1:])))
+                logger.info('{} ({}) ||| {}'.format(cmd_name, ctx_name, ' ||| '.join(args[1:])))
                 return command(*args[1:], ctx_name=ctx_name)
-        logging.info('ERROR: command: {}'.format(' ||| '.join(args)))
+        logger.info('ERROR: command: {}'.format(' ||| '.join(args)))
         
     def learn(self, source, target, ctx_name=None):
         '''Learn from training instance (inc extracting grammar if needed)
@@ -282,7 +284,7 @@ class RealtimeTranslator:
         lock.acquire()
         self.lazy_ctx(ctx_name)
         if '' in (source.strip(), target.strip()):
-            logging.info('ERROR: empty source or target: {} ||| {}'.format(source, target))
+            logger.info('ERROR: empty source or target: {} ||| {}'.format(source, target))
             lock.release()
             return
         if self.norm:
@@ -294,15 +296,15 @@ class RealtimeTranslator:
         # MIRA update before adding data to grammar extractor
         decoder = self.decoders[ctx_name]
         mira_log = decoder.decoder.update(source, grammar_file, target)
-        logging.info('MIRA: {}'.format(mira_log))
+        logger.info('MIRA: {}'.format(mira_log))
         # Add to HPYPLM by writing to fifo (read on next translation)
-        logging.info('Adding to HPYPLM: {}'.format(target))
+        logger.info('Adding to HPYPLM: {}'.format(target))
         decoder.ref_fifo.write('{}\n'.format(target))
         decoder.ref_fifo.flush()
         # Store incremental data for save/load
         self.ctx_data[ctx_name].append((source, target, alignment))
         # Add aligned sentence pair to grammar extractor
-        logging.info('Adding to bitext: {} ||| {} ||| {}'.format(source, target, alignment))
+        logger.info('Adding to bitext: {} ||| {} ||| {}'.format(source, target, alignment))
         self.extractor.add_instance(source, target, alignment, ctx_name)
         # Clear (old) cached grammar
         rm_grammar = self.grammar_dict[ctx_name].pop(source)
@@ -315,7 +317,7 @@ class RealtimeTranslator:
         self.lazy_ctx(ctx_name)
         ctx_data = self.ctx_data[ctx_name]
         out = open(filename, 'w') if filename else sys.stdout
-        logging.info('Saving state for context ({}) with {} sentences'.format(ctx_name, len(ctx_data)))
+        logger.info('Saving state for context ({}) with {} sentences'.format(ctx_name, len(ctx_data)))
         out.write('{}\n'.format(self.decoders[ctx_name].decoder.get_weights()))
         for (source, target, alignment) in ctx_data:
             out.write('{} ||| {} ||| {}\n'.format(source, target, alignment))
@@ -333,8 +335,8 @@ class RealtimeTranslator:
         input = open(filename) if filename else sys.stdin
         # Non-initial load error
         if ctx_data:
-            logging.info('ERROR: Incremental data has already been added to context ({})'.format(ctx_name))
-            logging.info('       State can only be loaded to a new context.')
+            logger.info('ERROR: Incremental data has already been added to context ({})'.format(ctx_name))
+            logger.info('       State can only be loaded to a new context.')
             lock.release()
             return
         # Many things can go wrong if bad state data is given
@@ -343,7 +345,7 @@ class RealtimeTranslator:
             line = input.readline().strip()
             # Throws exception if bad line
             decoder.decoder.set_weights(line)
-            logging.info('Loading state...')
+            logger.info('Loading state...')
             start_time = time.time()
             # Lines source ||| target ||| alignment
             while True:
@@ -362,12 +364,12 @@ class RealtimeTranslator:
                 decoder.ref_fifo.write('{}\n'.format(target))
                 decoder.ref_fifo.flush()
             stop_time = time.time()
-            logging.info('Loaded state for context ({}) with {} sentences in {} seconds'.format(ctx_name, len(ctx_data), stop_time - start_time))
+            logger.info('Loaded state for context ({}) with {} sentences in {} seconds'.format(ctx_name, len(ctx_data), stop_time - start_time))
             lock.release()
         # Recover from bad load attempt by restarting context.
         # Guaranteed not to cause data loss since only a new context can load state.
         except:
-            logging.info('ERROR: could not load state, restarting context ({})'.format(ctx_name))
+            logger.info('ERROR: could not load state, restarting context ({})'.format(ctx_name))
             # ctx_name is already owned and needs to be restarted before other blocking threads use
             self.drop_ctx(ctx_name, force=True)
             self.lazy_ctx(ctx_name)
author	Michael Denkowski <mdenkows@cs.cmu.edu>	2013-10-01 14:08:05 -0700
committer	Michael Denkowski <mdenkows@cs.cmu.edu>	2013-10-01 14:08:05 -0700
commit	51a83643d95ab0d7add9dd66b0b38044db10a797 (patch)
tree	7413fc7a105c1a1f9a3d0a2fb2f25d34640d8c8c /realtime
parent	4830491825f0c8740d505c192d60b388f64acf1b (diff)