diff options
Diffstat (limited to 'word-aligner')
| -rw-r--r-- | word-aligner/Makefile.am | 1 | ||||
| -rwxr-xr-x | word-aligner/force_align.py | 69 | 
2 files changed, 65 insertions, 5 deletions
| diff --git a/word-aligner/Makefile.am b/word-aligner/Makefile.am index 075ad009..071e4977 100644 --- a/word-aligner/Makefile.am +++ b/word-aligner/Makefile.am @@ -2,6 +2,7 @@ bin_PROGRAMS = fast_align binderiv  fast_align_SOURCES = fast_align.cc ttables.cc da.h ttables.h  fast_align_LDADD = ../utils/libutils.a +fast_align_LDFLAGS = $(STATIC_FLAGS)  binderiv_SOURCES = binderiv.cc  binderiv_LDADD = ../utils/libutils.a diff --git a/word-aligner/force_align.py b/word-aligner/force_align.py index 8386e6a5..5cef9026 100755 --- a/word-aligner/force_align.py +++ b/word-aligner/force_align.py @@ -1,11 +1,68 @@  #!/usr/bin/env python  import os +import subprocess  import sys +import threading -# Hook into realtime -sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'realtime', 'rt')) -from aligner import ForceAligner +# Simplified, non-threadsafe version for force_align.py +# Use the version in realtime for development +class Aligner: + +    def __init__(self, fwd_params, fwd_err, rev_params, rev_err, heuristic='grow-diag-final-and'): + +        cdec_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +        fast_align = os.path.join(cdec_root, 'word-aligner', 'fast_align') +        atools = os.path.join(cdec_root, 'utils', 'atools') + +        (fwd_T, fwd_m) = self.read_err(fwd_err) +        (rev_T, rev_m) = self.read_err(rev_err) + +        fwd_cmd = [fast_align, '-i', '-', '-d', '-T', fwd_T, '-m', fwd_m, '-f', fwd_params] +        rev_cmd = [fast_align, '-i', '-', '-d', '-T', rev_T, '-m', rev_m, '-f', rev_params, '-r'] +        tools_cmd = [atools, '-i', '-', '-j', '-', '-c', heuristic] + +        self.fwd_align = popen_io(fwd_cmd) +        self.rev_align = popen_io(rev_cmd) +        self.tools = popen_io(tools_cmd) + +    def align(self, line): +        self.fwd_align.stdin.write('{}\n'.format(line)) +        self.rev_align.stdin.write('{}\n'.format(line)) +        # f words ||| e words ||| links ||| score +        fwd_line = self.fwd_align.stdout.readline().split('|||')[2].strip() +        rev_line = self.rev_align.stdout.readline().split('|||')[2].strip() +        self.tools.stdin.write('{}\n'.format(fwd_line)) +        self.tools.stdin.write('{}\n'.format(rev_line)) +        al_line = self.tools.stdout.readline().strip() +        return al_line +  +    def close(self): +        self.fwd_align.stdin.close() +        self.fwd_align.wait() +        self.rev_align.stdin.close() +        self.rev_align.wait() +        self.tools.stdin.close() +        self.tools.wait() + +    def read_err(self, err): +        (T, m) = ('', '') +        for line in open(err): +            # expected target length = source length * N +            if 'expected target length' in line: +                m = line.split()[-1] +            # final tension: N +            elif 'final tension' in line: +                T = line.split()[-1] +        return (T, m) + +def popen_io(cmd): +    p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) +    def consume(s): +        for _ in s: +            pass +    threading.Thread(target=consume, args=(p.stderr,)).start() +    return p  def main(): @@ -20,16 +77,18 @@ def main():          sys.stderr.write('where heuristic is one of: (intersect union grow-diag grow-diag-final grow-diag-final-and) default=grow-diag-final-and\n')          sys.exit(2) -    aligner = ForceAligner(*sys.argv[1:]) +    aligner = Aligner(*sys.argv[1:])      while True:          line = sys.stdin.readline()          if not line:              break -        sys.stdout.write('{}\n'.format(aligner.align_formatted(line.strip()))) +        sys.stdout.write('{}\n'.format(aligner.align(line.strip())))          sys.stdout.flush()      aligner.close()  if __name__ == '__main__':      main() + + | 
