1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
import argparse
import datetime
import sys
from translate.storage.tmx import tmxfile
def extract_from_tmx(tmx_file_path,
src_out_path,
tgt_out_path,
begin_date,
date,
src_out_after,
tgt_out_after):
with open(tmx_file_path, 'rb') as in_fp:
tmx_file = tmxfile(in_fp)
if src_out_after is not None and tgt_out_after is not None:
src_out_after_fp = open(src_out_after, "w")
tgt_out_after_fp = open(tgt_out_after, "w")
with open(src_out_path, "w") as src_out_fp, open(tgt_out_path, "w") as tgt_out_fp:
for index, node in enumerate(tmx_file.unit_iter()):
src_out_fp_ = src_out_fp
tgt_out_fp_ = tgt_out_fp
if begin_date is not None:
date_string = node.get_target_dom().get('lastusagedate')[:8]
date_obj = datetime.datetime.strptime(date_string, '%Y%m%d').date()
if date_obj < begin_date:
continue
if date is not None:
date_string = node.get_target_dom().get('changedate')[:8]
date_obj = datetime.datetime.strptime(date_string, '%Y%m%d').date()
if date_obj > date:
src_out_fp_ = src_out_after_fp
tgt_out_fp_ = tgt_out_after_fp
src_string = f"{node.source}"
tgt_string = f"{node.target}"
src_string = src_string.replace('\n', ' ').replace('\r', '')
tgt_string = tgt_string.replace('\n', ' ').replace('\r', '')
src_out_fp_.write(f"{src_string}\n")
tgt_out_fp_.write(f"{tgt_string}\n")
if (index + 1) % 1000 == 0:
sys.stdout.write(f"Processed {index + 1} lines\r")
sys.stdout.flush()
if src_out_after is not None and tgt_out_after is not None:
src_out_after_fp.close()
tgt_out_after_fp.close()
def main():
usage = "Usage: python tmx_to_plain.py [options]"
parser = argparse.ArgumentParser(usage=usage)
parser.add_argument("-i", "--input", help="input tmx file")
parser.add_argument("-d", "--date", help="date for splitting the output")
parser.add_argument("-b", "--begin_date", help="earliest date (lastusage) to retain data")
args = parser.parse_args()
if args.input is None:
parser.print_help()
sys.exit(1)
args.input
src_out = args.input + ".src"
tgt_out = args.input + ".tgt"
if args.date is not None:
date = datetime.datetime.strptime(args.date, '%Y-%m-%d').date()
src_out_after = src_out + ".after." + args.date
tgt_out_after = tgt_out + ".after." + args.date
else:
date = None
src_out_after = None
tgt_out_after = None
if args.begin_date is not None:
begin_date = datetime.datetime.strptime(args.begin_date, '%Y-%m-%d').date()
else:
begin_date = None
extract_from_tmx(args.input, src_out, tgt_out, begin_date, date, src_out_after, tgt_out_after)
if __name__ == '__main__':
main()
|