summaryrefslogtreecommitdiff
path: root/realtime/realtime.py
diff options
context:
space:
mode:
authorMichael Denkowski <mdenkows@cs.cmu.edu>2013-09-03 12:13:03 -0700
committerMichael Denkowski <mdenkows@cs.cmu.edu>2013-09-03 12:13:03 -0700
commiteec45e082f4261871bb6547a14511d2c722e3f59 (patch)
treedd6fe95e46d59e7f1b8cd0fd2a467dd825bee7c1 /realtime/realtime.py
parent7e8600d924f082c2d4b84e5d80993531e880c390 (diff)
MIRA updates in realtime.py
Diffstat (limited to 'realtime/realtime.py')
-rwxr-xr-xrealtime/realtime.py47
1 files changed, 38 insertions, 9 deletions
diff --git a/realtime/realtime.py b/realtime/realtime.py
index c169ce4c..1f67bed7 100755
--- a/realtime/realtime.py
+++ b/realtime/realtime.py
@@ -1,21 +1,24 @@
#!/usr/bin/env python
+
import argparse
-import os
+import collections
import logging
-import cdec.configobj
-import cdec.sa
+import os
import shutil
import sys
import subprocess
import tempfile
import time
+import cdec.configobj
+import cdec.sa
+
from rt import ForceAligner
from rt import MIRADecoder
class RealtimeDecoder:
- def __init__(self, configdir, tmpdir='/tmp'):
+ def __init__(self, configdir, tmpdir='/tmp', cache_size=5):
# Temporary work dir
self.tmp = tempfile.mkdtemp(dir=tmpdir)
@@ -31,6 +34,9 @@ class RealtimeDecoder:
# Grammar extractor
sa_config = os.path.join(configdir, 'sa.ini')
self.extractor = cdec.sa.GrammarExtractor(sa_config, online=True)
+ self.grammar_files = collections.deque()
+ self.grammar_dict = {}
+ self.cache_size = cache_size
# Decoder
decoder_config = os.path.join(configdir, 'cdec.ini')
@@ -45,10 +51,24 @@ class RealtimeDecoder:
shutil.rmtree(self.tmp)
def grammar(self, sentence):
+ grammar_file = self.grammar_dict.get(sentence, None)
+ # Cache hit
+ if grammar_file:
+ logging.info('Grammar cache hit')
+ return grammar_file
+ # Extract and cache
grammar_file = tempfile.mkstemp(dir=self.tmp)[1]
with open(grammar_file, 'w') as output:
for rule in self.extractor.grammar(sentence):
output.write(str(rule) + '\n')
+ if len(self.grammar_files) == self.cache_size:
+ rm_sent = self.grammar_files.popleft()
+ # If not already removed by learn method
+ if rm_sent in self.grammar_dict:
+ rm_grammar = self.grammar_dict.pop(rm_sent)
+ os.remove(rm_grammar)
+ self.grammar_files.append(sentence)
+ self.grammar_dict[sentence] = grammar_file
return grammar_file
def decode(self, sentence):
@@ -57,15 +77,21 @@ class RealtimeDecoder:
hyp = self.decoder.decode(sentence, grammar_file)
stop_time = time.time()
logging.info('Translation time: {} seconds'.format(stop_time - start_time))
- os.remove(grammar_file)
return hyp
def learn(self, source, target):
- alignment = self.aligner.align('{} ||| {}'.format(source, target))
+ # MIRA update before adding data to grammar extractor
+ grammar_file = self.grammar(source)
+ mira_log = self.decoder.update(source, grammar_file, target)
+ logging.info('MIRA: {}'.format(mira_log))
+ # Add aligned sentence pair to grammar extractor
+ alignment = self.aligner.align(source, target)
logging.info('Adding instance: {} ||| {} ||| {}'.format(source, target, alignment))
self.extractor.add_instance(source, target, alignment)
- # TODO: Add to LM
- # TODO: MIRA update
+ # Clear (old) cached grammar
+ rm_grammar = self.grammar_dict.pop(source)
+ os.remove(rm_grammar)
+ # TODO: Add to LM by writing to fifo
def main():
@@ -84,7 +110,10 @@ def main():
rtd = RealtimeDecoder(args.config)
try:
- for line in sys.stdin:
+ while True:
+ line = sys.stdin.readline()
+ if not line:
+ break
input = [f.strip() for f in line.split('|||')]
if len(input) == 1:
hyp = rtd.decode(input[0])