From 3a6fa32ca16d0fbdc76e738449bf1b27d866acc6 Mon Sep 17 00:00:00 2001 From: Michael Denkowski Date: Mon, 19 Aug 2013 08:23:42 -0700 Subject: Realtime translation (mostly a cdec wrapper for now) --- realtime/rt/__init__.py | 2 ++ realtime/rt/aligner.py | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ realtime/rt/decoder.py | 23 +++++++++++++++++++++++ realtime/rt/util.py | 13 +++++++++++++ 4 files changed, 88 insertions(+) create mode 100644 realtime/rt/__init__.py create mode 100644 realtime/rt/aligner.py create mode 100644 realtime/rt/decoder.py create mode 100644 realtime/rt/util.py (limited to 'realtime/rt') diff --git a/realtime/rt/__init__.py b/realtime/rt/__init__.py new file mode 100644 index 00000000..7a1aeda7 --- /dev/null +++ b/realtime/rt/__init__.py @@ -0,0 +1,2 @@ +from aligner import * +from decoder import * diff --git a/realtime/rt/aligner.py b/realtime/rt/aligner.py new file mode 100644 index 00000000..d94dbda0 --- /dev/null +++ b/realtime/rt/aligner.py @@ -0,0 +1,50 @@ +import os +import sys +import subprocess + +import util + +class ForceAligner: + + def __init__(self, fwd_params, fwd_err, rev_params, rev_err): + + cdec_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + fast_align = os.path.join(cdec_root, 'word-aligner', 'fast_align') + atools = os.path.join(cdec_root, 'utils', 'atools') + + (fwd_T, fwd_m) = self.read_err(fwd_err) + (rev_T, rev_m) = self.read_err(rev_err) + + fwd_cmd = [fast_align, '-i', '-', '-d', '-T', fwd_T, '-m', fwd_m, '-f', fwd_params] + rev_cmd = [fast_align, '-i', '-', '-d', '-T', rev_T, '-m', rev_m, '-f', rev_params, '-r'] + tools_cmd = [atools, '-i', '-', '-j', '-', '-c', 'grow-diag-final-and'] + + self.fwd_align = util.popen_io(fwd_cmd) + self.rev_align = util.popen_io(rev_cmd) + self.tools = util.popen_io(tools_cmd) + + def align(self, line): + self.fwd_align.stdin.write('{}\n'.format(line)) + self.rev_align.stdin.write('{}\n'.format(line)) + # f words ||| e words ||| links ||| score + fwd_line = self.fwd_align.stdout.readline().split('|||')[2].strip() + rev_line = self.rev_align.stdout.readline().split('|||')[2].strip() + self.tools.stdin.write('{}\n'.format(fwd_line)) + self.tools.stdin.write('{}\n'.format(rev_line)) + return self.tools.stdout.readline().strip() + + def close(self): + self.fwd_align.stdin.close() + self.rev_align.stdin.close() + self.tools.stdin.close() + + def read_err(self, err): + (T, m) = ('', '') + for line in open(err): + # expected target length = source length * N + if 'expected target length' in line: + m = line.split()[-1] + # final tension: N + elif 'final tension' in line: + T = line.split()[-1] + return (T, m) diff --git a/realtime/rt/decoder.py b/realtime/rt/decoder.py new file mode 100644 index 00000000..f4fea0e2 --- /dev/null +++ b/realtime/rt/decoder.py @@ -0,0 +1,23 @@ +import os +import subprocess + +import util + +class Decoder: + + def close(self): + self.decoder.stdin.close() + +class CdecDecoder(Decoder): + + def __init__(self, config, weights): + cdec_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) + decoder = os.path.join(cdec_root, 'decoder', 'cdec') + decoder_cmd = [decoder, '-c', config, '-w', weights] + self.decoder = util.popen_io(decoder_cmd) + + def decode(self, sentence, grammar): + input = '{s}\n'.format(i=id, s=sentence, g=grammar) + self.decoder.stdin.write(input) + return self.decoder.stdout.readline().strip() + diff --git a/realtime/rt/util.py b/realtime/rt/util.py new file mode 100644 index 00000000..7f877161 --- /dev/null +++ b/realtime/rt/util.py @@ -0,0 +1,13 @@ +import subprocess +import threading + +def popen_io(cmd): + p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + consume_stream(p.stderr) + return p + +def consume_stream(stream): + def consume(s): + for _ in s: + pass + threading.Thread(target=consume, args=(stream,)).start() -- cgit v1.2.3