summaryrefslogtreecommitdiff
path: root/bitext2tmx
blob: e9c8e23fa883670ace49d6e2b579aa7811afa9ff (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/env python3

import sys
from xml.sax.saxutils import escape


if __name__ == "__main__":
    prefix = """<tmx version="1.4">
  <header
    creationtool="bitext2tmx" creationtoolversion="1.0"
    datatype="PlainText" segtype="sentence"
    adminlang="en-us" srclang="en"
    o-tmf="ABCTransMem"/>
  <body>"""

    with open(sys.argv[1], "r") as src_file, open(sys.argv[2], "r") as tgt_file:
        src_lang = sys.argv[1].split(".")[-1]
        tgt_lang = sys.argv[2].split(".")[-1]

        tus = []
        for src_line, tgt_line in zip(src_file.readlines(), tgt_file.readlines()):
            src_line = src_line.rstrip("\n")
            tgt_line = tgt_line.rstrip("\n")
            tus.append(f"""
    <tu>
      <tuv xml:lang="{src_lang}">
        <seg>{escape(src_line)}</seg>
      </tuv>
      <tuv xml:lang="{tgt_lang}">
        <seg>{escape(tgt_line)}</seg>
      </tuv>
    </tu>""")

    suffix = """  </body>
</tmx>"""

    complete = "\n".join([prefix] + tus + [suffix])

    print(complete)