diff options
author | mjdenkowski <michael.j.denkowski@gmail.com> | 2014-08-07 16:26:15 -0400 |
---|---|---|
committer | mjdenkowski <michael.j.denkowski@gmail.com> | 2014-08-07 16:26:15 -0400 |
commit | 19e1c5a5fbe178b91b5e2995584b5e72a7a5940f (patch) | |
tree | 67363daf428306d27fd1a07df68e8fc8d10288a7 | |
parent | a15d39c6ca0a39c7c549d24f0e8c72731821c8c0 (diff) |
Don't depend on realtime in case people don't want to build pycdec.
-rwxr-xr-x | word-aligner/force_align.py | 69 |
1 files changed, 64 insertions, 5 deletions
diff --git a/word-aligner/force_align.py b/word-aligner/force_align.py index 8386e6a5..5cef9026 100755 --- a/word-aligner/force_align.py +++ b/word-aligner/force_align.py @@ -1,11 +1,68 @@ #!/usr/bin/env python import os +import subprocess import sys +import threading -# Hook into realtime -sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'realtime', 'rt')) -from aligner import ForceAligner +# Simplified, non-threadsafe version for force_align.py +# Use the version in realtime for development +class Aligner: + + def __init__(self, fwd_params, fwd_err, rev_params, rev_err, heuristic='grow-diag-final-and'): + + cdec_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + fast_align = os.path.join(cdec_root, 'word-aligner', 'fast_align') + atools = os.path.join(cdec_root, 'utils', 'atools') + + (fwd_T, fwd_m) = self.read_err(fwd_err) + (rev_T, rev_m) = self.read_err(rev_err) + + fwd_cmd = [fast_align, '-i', '-', '-d', '-T', fwd_T, '-m', fwd_m, '-f', fwd_params] + rev_cmd = [fast_align, '-i', '-', '-d', '-T', rev_T, '-m', rev_m, '-f', rev_params, '-r'] + tools_cmd = [atools, '-i', '-', '-j', '-', '-c', heuristic] + + self.fwd_align = popen_io(fwd_cmd) + self.rev_align = popen_io(rev_cmd) + self.tools = popen_io(tools_cmd) + + def align(self, line): + self.fwd_align.stdin.write('{}\n'.format(line)) + self.rev_align.stdin.write('{}\n'.format(line)) + # f words ||| e words ||| links ||| score + fwd_line = self.fwd_align.stdout.readline().split('|||')[2].strip() + rev_line = self.rev_align.stdout.readline().split('|||')[2].strip() + self.tools.stdin.write('{}\n'.format(fwd_line)) + self.tools.stdin.write('{}\n'.format(rev_line)) + al_line = self.tools.stdout.readline().strip() + return al_line + + def close(self): + self.fwd_align.stdin.close() + self.fwd_align.wait() + self.rev_align.stdin.close() + self.rev_align.wait() + self.tools.stdin.close() + self.tools.wait() + + def read_err(self, err): + (T, m) = ('', '') + for line in open(err): + # expected target length = source length * N + if 'expected target length' in line: + m = line.split()[-1] + # final tension: N + elif 'final tension' in line: + T = line.split()[-1] + return (T, m) + +def popen_io(cmd): + p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + def consume(s): + for _ in s: + pass + threading.Thread(target=consume, args=(p.stderr,)).start() + return p def main(): @@ -20,16 +77,18 @@ def main(): sys.stderr.write('where heuristic is one of: (intersect union grow-diag grow-diag-final grow-diag-final-and) default=grow-diag-final-and\n') sys.exit(2) - aligner = ForceAligner(*sys.argv[1:]) + aligner = Aligner(*sys.argv[1:]) while True: line = sys.stdin.readline() if not line: break - sys.stdout.write('{}\n'.format(aligner.align_formatted(line.strip()))) + sys.stdout.write('{}\n'.format(aligner.align(line.strip()))) sys.stdout.flush() aligner.close() if __name__ == '__main__': main() + + |