summaryrefslogtreecommitdiff
path: root/realtime/rt/aligner.py
diff options
context:
space:
mode:
authorMichael Denkowski <mdenkows@cs.cmu.edu>2013-08-19 08:23:42 -0700
committerMichael Denkowski <mdenkows@cs.cmu.edu>2013-08-19 08:23:42 -0700
commitac469cdf4c70154a1c2cedce9edf5cdc3bdb2d61 (patch)
tree3e8bcbc6b00533e7a79e3cf28c2ac6aa4bdadd8d /realtime/rt/aligner.py
parentf4a3a2547316ca5d31366e6808858fe94981415c (diff)
Realtime translation (mostly a cdec wrapper for now)
Diffstat (limited to 'realtime/rt/aligner.py')
-rw-r--r--realtime/rt/aligner.py50
1 files changed, 50 insertions, 0 deletions
diff --git a/realtime/rt/aligner.py b/realtime/rt/aligner.py
new file mode 100644
index 00000000..d94dbda0
--- /dev/null
+++ b/realtime/rt/aligner.py
@@ -0,0 +1,50 @@
+import os
+import sys
+import subprocess
+
+import util
+
+class ForceAligner:
+
+ def __init__(self, fwd_params, fwd_err, rev_params, rev_err):
+
+ cdec_root = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+ fast_align = os.path.join(cdec_root, 'word-aligner', 'fast_align')
+ atools = os.path.join(cdec_root, 'utils', 'atools')
+
+ (fwd_T, fwd_m) = self.read_err(fwd_err)
+ (rev_T, rev_m) = self.read_err(rev_err)
+
+ fwd_cmd = [fast_align, '-i', '-', '-d', '-T', fwd_T, '-m', fwd_m, '-f', fwd_params]
+ rev_cmd = [fast_align, '-i', '-', '-d', '-T', rev_T, '-m', rev_m, '-f', rev_params, '-r']
+ tools_cmd = [atools, '-i', '-', '-j', '-', '-c', 'grow-diag-final-and']
+
+ self.fwd_align = util.popen_io(fwd_cmd)
+ self.rev_align = util.popen_io(rev_cmd)
+ self.tools = util.popen_io(tools_cmd)
+
+ def align(self, line):
+ self.fwd_align.stdin.write('{}\n'.format(line))
+ self.rev_align.stdin.write('{}\n'.format(line))
+ # f words ||| e words ||| links ||| score
+ fwd_line = self.fwd_align.stdout.readline().split('|||')[2].strip()
+ rev_line = self.rev_align.stdout.readline().split('|||')[2].strip()
+ self.tools.stdin.write('{}\n'.format(fwd_line))
+ self.tools.stdin.write('{}\n'.format(rev_line))
+ return self.tools.stdout.readline().strip()
+
+ def close(self):
+ self.fwd_align.stdin.close()
+ self.rev_align.stdin.close()
+ self.tools.stdin.close()
+
+ def read_err(self, err):
+ (T, m) = ('', '')
+ for line in open(err):
+ # expected target length = source length * N
+ if 'expected target length' in line:
+ m = line.split()[-1]
+ # final tension: N
+ elif 'final tension' in line:
+ T = line.split()[-1]
+ return (T, m)