diff options
| author | mjdenkowski <michael.j.denkowski@gmail.com> | 2014-08-07 16:26:15 -0400 | 
|---|---|---|
| committer | mjdenkowski <michael.j.denkowski@gmail.com> | 2014-08-07 16:26:15 -0400 | 
| commit | 19e1c5a5fbe178b91b5e2995584b5e72a7a5940f (patch) | |
| tree | 67363daf428306d27fd1a07df68e8fc8d10288a7 | |
| parent | a15d39c6ca0a39c7c549d24f0e8c72731821c8c0 (diff) | |
Don't depend on realtime in case people don't want to build pycdec.
| -rwxr-xr-x | word-aligner/force_align.py | 69 | 
1 files changed, 64 insertions, 5 deletions
| diff --git a/word-aligner/force_align.py b/word-aligner/force_align.py index 8386e6a5..5cef9026 100755 --- a/word-aligner/force_align.py +++ b/word-aligner/force_align.py @@ -1,11 +1,68 @@  #!/usr/bin/env python  import os +import subprocess  import sys +import threading -# Hook into realtime -sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'realtime', 'rt')) -from aligner import ForceAligner +# Simplified, non-threadsafe version for force_align.py +# Use the version in realtime for development +class Aligner: + +    def __init__(self, fwd_params, fwd_err, rev_params, rev_err, heuristic='grow-diag-final-and'): + +        cdec_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +        fast_align = os.path.join(cdec_root, 'word-aligner', 'fast_align') +        atools = os.path.join(cdec_root, 'utils', 'atools') + +        (fwd_T, fwd_m) = self.read_err(fwd_err) +        (rev_T, rev_m) = self.read_err(rev_err) + +        fwd_cmd = [fast_align, '-i', '-', '-d', '-T', fwd_T, '-m', fwd_m, '-f', fwd_params] +        rev_cmd = [fast_align, '-i', '-', '-d', '-T', rev_T, '-m', rev_m, '-f', rev_params, '-r'] +        tools_cmd = [atools, '-i', '-', '-j', '-', '-c', heuristic] + +        self.fwd_align = popen_io(fwd_cmd) +        self.rev_align = popen_io(rev_cmd) +        self.tools = popen_io(tools_cmd) + +    def align(self, line): +        self.fwd_align.stdin.write('{}\n'.format(line)) +        self.rev_align.stdin.write('{}\n'.format(line)) +        # f words ||| e words ||| links ||| score +        fwd_line = self.fwd_align.stdout.readline().split('|||')[2].strip() +        rev_line = self.rev_align.stdout.readline().split('|||')[2].strip() +        self.tools.stdin.write('{}\n'.format(fwd_line)) +        self.tools.stdin.write('{}\n'.format(rev_line)) +        al_line = self.tools.stdout.readline().strip() +        return al_line +  +    def close(self): +        self.fwd_align.stdin.close() +        self.fwd_align.wait() +        self.rev_align.stdin.close() +        self.rev_align.wait() +        self.tools.stdin.close() +        self.tools.wait() + +    def read_err(self, err): +        (T, m) = ('', '') +        for line in open(err): +            # expected target length = source length * N +            if 'expected target length' in line: +                m = line.split()[-1] +            # final tension: N +            elif 'final tension' in line: +                T = line.split()[-1] +        return (T, m) + +def popen_io(cmd): +    p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) +    def consume(s): +        for _ in s: +            pass +    threading.Thread(target=consume, args=(p.stderr,)).start() +    return p  def main(): @@ -20,16 +77,18 @@ def main():          sys.stderr.write('where heuristic is one of: (intersect union grow-diag grow-diag-final grow-diag-final-and) default=grow-diag-final-and\n')          sys.exit(2) -    aligner = ForceAligner(*sys.argv[1:]) +    aligner = Aligner(*sys.argv[1:])      while True:          line = sys.stdin.readline()          if not line:              break -        sys.stdout.write('{}\n'.format(aligner.align_formatted(line.strip()))) +        sys.stdout.write('{}\n'.format(aligner.align(line.strip())))          sys.stdout.flush()      aligner.close()  if __name__ == '__main__':      main() + + | 
