From 772b93bedbf6d96b71d2fca023da7f5438577105 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 13 Jul 2013 14:11:06 -0400 Subject: add extra release file --- training/mira/Makefile.am | 2 ++ 1 file changed, 2 insertions(+) diff --git a/training/mira/Makefile.am b/training/mira/Makefile.am index caaa302d..44bf1063 100644 --- a/training/mira/Makefile.am +++ b/training/mira/Makefile.am @@ -1,6 +1,8 @@ bin_PROGRAMS = kbest_mira \ kbest_cut_mira +EXTRA_DIST = mira.py + kbest_mira_SOURCES = kbest_mira.cc kbest_mira_LDFLAGS= -rdynamic kbest_mira_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a -- cgit v1.2.3 From af7e77d1273fa378ea19bb8607b9afbf77d41065 Mon Sep 17 00:00:00 2001 From: Michael Denkowski Date: Tue, 30 Jul 2013 16:41:53 -0700 Subject: Allow reading pairs of lines from stdin, easy force alignment script --- utils/atools.cc | 3 +-- word-aligner/force_align.py | 59 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) create mode 100755 word-aligner/force_align.py diff --git a/utils/atools.cc b/utils/atools.cc index 24406b71..1726c4ac 100644 --- a/utils/atools.cc +++ b/utils/atools.cc @@ -299,8 +299,7 @@ void InitCommandLine(unsigned argc, char** argv, po::variables_map* conf) { exit(1); } if ((*conf)["input_1"].as() == "-" && (*conf)["input_2"].as() == "-") { - cerr << "Both inputs cannot be STDIN\n"; - exit(1); + cerr << "Both inputs STDIN, reading PAIRS of lines\n"; } } else { if (conf->count("input_2") != 0) { diff --git a/word-aligner/force_align.py b/word-aligner/force_align.py new file mode 100755 index 00000000..f404fb54 --- /dev/null +++ b/word-aligner/force_align.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python + +import os +import sys +import subprocess + +def main(): + + if len(sys.argv[1:]) != 4: + sys.stderr.write('run:\n') + sys.stderr.write(' fast_align -i corpus.f-e -d -v -o -p fwd_params >fwd_align 2>fwd_err\n') + sys.stderr.write(' fast_align -i corpus.f-e -r -d -v -o -p rev_params >rev_align 2>rev_err\n') + sys.stderr.write('\n') + sys.stderr.write('then run:\n') + sys.stderr.write(' {} fwd_params fwd_err rev_params rev_err out.f-e.gdfa\n'.format(sys.argv[0])) + sys.exit(2) + + (f_p, f_err, r_p, r_err) = sys.argv[1:] + + (f_T, f_m) = find_Tm(f_err) + (r_T, r_m) = find_Tm(r_err) + + fast_align = os.path.join(os.path.dirname(__file__), 'fast_align') + f_cmd = [fast_align, '-i', '-', '-d', '-T', f_T, '-m', f_m, '-f', f_p] + r_cmd = [fast_align, '-i', '-', '-d', '-T', r_T, '-m', r_m, '-f', r_p, '-r'] + + atools = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'utils', 'atools') + tools_cmd = [atools, '-i', '-', '-j', '-', '-c', 'grow-diag-final-and'] + + sys.stderr.write('running: {}\n'.format(' '.join(f_cmd))) + sys.stderr.write('running: {}\n'.format(' '.join(r_cmd))) + sys.stderr.write('running: {}\n'.format(' '.join(tools_cmd))) + + f_a = subprocess.Popen(f_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + r_a = subprocess.Popen(r_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + tools = subprocess.Popen(tools_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + + for line in sys.stdin: + f_a.stdin.write(line) + r_a.stdin.write(line) + # f words ||| e words ||| links ||| score + f_line = f_a.stdout.readline().split(' ||| ')[2] + r_line = r_a.stdout.readline().split(' ||| ')[2] + tools.stdin.write('{}\n'.format(f_line)) + tools.stdin.write('{}\n'.format(r_line)) + sys.stdout.write(tools.stdout.readline()) + +def find_Tm(err): + (T, m) = ('', '') + for line in open(err): + # expected target length = source length * N + if 'expected target length' in line: + m = line.split()[-1] + elif 'final tension' in line: + T = line.split()[-1] + return (T, m) + +if __name__ == '__main__': + main() -- cgit v1.2.3 From f4a3a2547316ca5d31366e6808858fe94981415c Mon Sep 17 00:00:00 2001 From: Michael Denkowski Date: Thu, 1 Aug 2013 13:03:20 -0700 Subject: Be less dumb about parsing fields --- word-aligner/force_align.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/word-aligner/force_align.py b/word-aligner/force_align.py index f404fb54..a0c1aad7 100755 --- a/word-aligner/force_align.py +++ b/word-aligner/force_align.py @@ -39,8 +39,8 @@ def main(): f_a.stdin.write(line) r_a.stdin.write(line) # f words ||| e words ||| links ||| score - f_line = f_a.stdout.readline().split(' ||| ')[2] - r_line = r_a.stdout.readline().split(' ||| ')[2] + f_line = f_a.stdout.readline().split('|||')[2].strip() + r_line = r_a.stdout.readline().split('|||')[2].strip() tools.stdin.write('{}\n'.format(f_line)) tools.stdin.write('{}\n'.format(r_line)) sys.stdout.write(tools.stdout.readline()) -- cgit v1.2.3