diff options
author | Michael Denkowski <mdenkows@cs.cmu.edu> | 2013-08-19 08:23:42 -0700 |
---|---|---|
committer | Michael Denkowski <mdenkows@cs.cmu.edu> | 2013-08-19 08:23:42 -0700 |
commit | 3a6fa32ca16d0fbdc76e738449bf1b27d866acc6 (patch) | |
tree | 677fbf6219bfb6236dafc99df4df83fd1d73027b /word-aligner | |
parent | 3c495d2b8664f292aedda8235eb8bf375830700c (diff) |
Realtime translation (mostly a cdec wrapper for now)
Diffstat (limited to 'word-aligner')
-rwxr-xr-x | word-aligner/force_align.py | 46 |
1 files changed, 8 insertions, 38 deletions
diff --git a/word-aligner/force_align.py b/word-aligner/force_align.py index a0c1aad7..ad6d95fa 100755 --- a/word-aligner/force_align.py +++ b/word-aligner/force_align.py @@ -2,7 +2,6 @@ import os import sys -import subprocess def main(): @@ -15,45 +14,16 @@ def main(): sys.stderr.write(' {} fwd_params fwd_err rev_params rev_err <in.f-e >out.f-e.gdfa\n'.format(sys.argv[0])) sys.exit(2) - (f_p, f_err, r_p, r_err) = sys.argv[1:] - - (f_T, f_m) = find_Tm(f_err) - (r_T, r_m) = find_Tm(r_err) - - fast_align = os.path.join(os.path.dirname(__file__), 'fast_align') - f_cmd = [fast_align, '-i', '-', '-d', '-T', f_T, '-m', f_m, '-f', f_p] - r_cmd = [fast_align, '-i', '-', '-d', '-T', r_T, '-m', r_m, '-f', r_p, '-r'] - - atools = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'utils', 'atools') - tools_cmd = [atools, '-i', '-', '-j', '-', '-c', 'grow-diag-final-and'] - - sys.stderr.write('running: {}\n'.format(' '.join(f_cmd))) - sys.stderr.write('running: {}\n'.format(' '.join(r_cmd))) - sys.stderr.write('running: {}\n'.format(' '.join(tools_cmd))) - - f_a = subprocess.Popen(f_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) - r_a = subprocess.Popen(r_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) - tools = subprocess.Popen(tools_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + # Hook into realtime + sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'realtime')) + from rt import ForceAligner + aligner = ForceAligner(*sys.argv[1:]) + for line in sys.stdin: - f_a.stdin.write(line) - r_a.stdin.write(line) - # f words ||| e words ||| links ||| score - f_line = f_a.stdout.readline().split('|||')[2].strip() - r_line = r_a.stdout.readline().split('|||')[2].strip() - tools.stdin.write('{}\n'.format(f_line)) - tools.stdin.write('{}\n'.format(r_line)) - sys.stdout.write(tools.stdout.readline()) - -def find_Tm(err): - (T, m) = ('', '') - for line in open(err): - # expected target length = source length * N - if 'expected target length' in line: - m = line.split()[-1] - elif 'final tension' in line: - T = line.split()[-1] - return (T, m) + sys.stdout.write('{}\n'.format(aligner.align(line.strip()))) + aligner.close() + if __name__ == '__main__': main() |