summaryrefslogtreecommitdiff
path: root/word-aligner/force_align.py
diff options
context:
space:
mode:
authormjdenkowski <michael.j.denkowski@gmail.com>2014-08-07 16:26:15 -0400
committermjdenkowski <michael.j.denkowski@gmail.com>2014-08-07 16:26:15 -0400
commitf616df9a37edc3bbcecedc395ba7233c8fab3770 (patch)
tree42960cca615b02ca8078923c40eb28d4d50892e3 /word-aligner/force_align.py
parent816858191b566556f0e72651fad3f243ffa07c4d (diff)
Don't depend on realtime in case people don't want to build pycdec.
Diffstat (limited to 'word-aligner/force_align.py')
-rwxr-xr-xword-aligner/force_align.py69
1 files changed, 64 insertions, 5 deletions
diff --git a/word-aligner/force_align.py b/word-aligner/force_align.py
index 8386e6a5..5cef9026 100755
--- a/word-aligner/force_align.py
+++ b/word-aligner/force_align.py
@@ -1,11 +1,68 @@
#!/usr/bin/env python
import os
+import subprocess
import sys
+import threading
-# Hook into realtime
-sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'realtime', 'rt'))
-from aligner import ForceAligner
+# Simplified, non-threadsafe version for force_align.py
+# Use the version in realtime for development
+class Aligner:
+
+ def __init__(self, fwd_params, fwd_err, rev_params, rev_err, heuristic='grow-diag-final-and'):
+
+ cdec_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+ fast_align = os.path.join(cdec_root, 'word-aligner', 'fast_align')
+ atools = os.path.join(cdec_root, 'utils', 'atools')
+
+ (fwd_T, fwd_m) = self.read_err(fwd_err)
+ (rev_T, rev_m) = self.read_err(rev_err)
+
+ fwd_cmd = [fast_align, '-i', '-', '-d', '-T', fwd_T, '-m', fwd_m, '-f', fwd_params]
+ rev_cmd = [fast_align, '-i', '-', '-d', '-T', rev_T, '-m', rev_m, '-f', rev_params, '-r']
+ tools_cmd = [atools, '-i', '-', '-j', '-', '-c', heuristic]
+
+ self.fwd_align = popen_io(fwd_cmd)
+ self.rev_align = popen_io(rev_cmd)
+ self.tools = popen_io(tools_cmd)
+
+ def align(self, line):
+ self.fwd_align.stdin.write('{}\n'.format(line))
+ self.rev_align.stdin.write('{}\n'.format(line))
+ # f words ||| e words ||| links ||| score
+ fwd_line = self.fwd_align.stdout.readline().split('|||')[2].strip()
+ rev_line = self.rev_align.stdout.readline().split('|||')[2].strip()
+ self.tools.stdin.write('{}\n'.format(fwd_line))
+ self.tools.stdin.write('{}\n'.format(rev_line))
+ al_line = self.tools.stdout.readline().strip()
+ return al_line
+
+ def close(self):
+ self.fwd_align.stdin.close()
+ self.fwd_align.wait()
+ self.rev_align.stdin.close()
+ self.rev_align.wait()
+ self.tools.stdin.close()
+ self.tools.wait()
+
+ def read_err(self, err):
+ (T, m) = ('', '')
+ for line in open(err):
+ # expected target length = source length * N
+ if 'expected target length' in line:
+ m = line.split()[-1]
+ # final tension: N
+ elif 'final tension' in line:
+ T = line.split()[-1]
+ return (T, m)
+
+def popen_io(cmd):
+ p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ def consume(s):
+ for _ in s:
+ pass
+ threading.Thread(target=consume, args=(p.stderr,)).start()
+ return p
def main():
@@ -20,16 +77,18 @@ def main():
sys.stderr.write('where heuristic is one of: (intersect union grow-diag grow-diag-final grow-diag-final-and) default=grow-diag-final-and\n')
sys.exit(2)
- aligner = ForceAligner(*sys.argv[1:])
+ aligner = Aligner(*sys.argv[1:])
while True:
line = sys.stdin.readline()
if not line:
break
- sys.stdout.write('{}\n'.format(aligner.align_formatted(line.strip())))
+ sys.stdout.write('{}\n'.format(aligner.align(line.strip())))
sys.stdout.flush()
aligner.close()
if __name__ == '__main__':
main()
+
+