summaryrefslogtreecommitdiff
path: root/tmx-to-plain
diff options
context:
space:
mode:
authorPatrick Simianer <patrick@lilt.com>2026-02-26 10:05:59 +0000
committerPatrick Simianer <patrick@lilt.com>2026-02-26 10:05:59 +0000
commitb31ace79ea5f6b3f279c544cd3a443d6fbf2a24d (patch)
tree31f2b599fa5f6996aeb134390d58deb63eefe04a /tmx-to-plain
parent8805e95ae94d798c6441f7e1b72c90e049563f17 (diff)
overhaulHEADmaster
Diffstat (limited to 'tmx-to-plain')
-rwxr-xr-xtmx-to-plain95
1 files changed, 95 insertions, 0 deletions
diff --git a/tmx-to-plain b/tmx-to-plain
new file mode 100755
index 0000000..025d6e4
--- /dev/null
+++ b/tmx-to-plain
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+
+import argparse
+import datetime
+import sys
+
+from translate.storage.tmx import tmxfile
+
+
+def extract_from_tmx(tmx_file_path,
+ src_out_path,
+ tgt_out_path,
+ begin_date,
+ date,
+ src_out_after,
+ tgt_out_after):
+ with open(tmx_file_path, "rb") as in_fp:
+ tmx_file = tmxfile(in_fp)
+
+ if src_out_after is not None and tgt_out_after is not None:
+ src_out_after_fp = open(src_out_after, "w")
+ tgt_out_after_fp = open(tgt_out_after, "w")
+
+
+ with open(src_out_path, "w") as src_out_fp, open(tgt_out_path, "w") as tgt_out_fp:
+ for index, node in enumerate(tmx_file.unit_iter()):
+ src_out_fp_ = src_out_fp
+ tgt_out_fp_ = tgt_out_fp
+
+ if begin_date is not None:
+ date_string = node.get_target_dom().get("lastusagedate")[:8]
+ date_obj = datetime.datetime.strptime(date_string, "%Y%m%d").date()
+ if date_obj < begin_date:
+ continue
+
+ if date is not None:
+ date_string = node.get_target_dom().get("changedate")[:8]
+ date_obj = datetime.datetime.strptime(date_string, "%Y%m%d").date()
+ if date_obj > date:
+ src_out_fp_ = src_out_after_fp
+ tgt_out_fp_ = tgt_out_after_fp
+
+ src_string = f"{node.source}"
+ tgt_string = f"{node.target}"
+ src_string = src_string.replace("\n", " ").replace("\r", "")
+ tgt_string = tgt_string.replace("\n", " ").replace("\r", "")
+
+ src_out_fp_.write(f"{src_string}\n")
+ tgt_out_fp_.write(f"{tgt_string}\n")
+ if (index + 1) % 1000 == 0:
+ sys.stdout.write(f"Processed {index + 1} lines\r")
+ sys.stdout.flush()
+
+ if src_out_after is not None and tgt_out_after is not None:
+ src_out_after_fp.close()
+ tgt_out_after_fp.close()
+
+
+def main():
+
+ usage = f"Usage: {sys.argv[0]} [options]"
+ parser = argparse.ArgumentParser(usage=usage)
+ parser.add_argument("-i", "--input", help="input tmx file")
+ parser.add_argument("-d", "--date", help="date for splitting the output")
+ parser.add_argument("-b", "--begin_date", help="earliest date (lastusage) to retain data")
+
+ args = parser.parse_args()
+
+ if args.input is None:
+ parser.print_help()
+ sys.exit(1)
+
+ src_out = f"{args.input}.src"
+ tgt_out = f"{args.input}.tgt"
+
+
+ if args.date is not None:
+ date = datetime.datetime.strptime(args.date, "%Y-%m-%d").date()
+ src_out_after = f"{src_out}.after.{args.date}"
+ tgt_out_after = f"{tgt_out}.after.{args.date}"
+ else:
+ date = None
+ src_out_after = None
+ tgt_out_after = None
+
+ if args.begin_date is not None:
+ begin_date = datetime.datetime.strptime(args.begin_date, "%Y-%m-%d").date()
+ else:
+ begin_date = None
+
+ extract_from_tmx(args.input, src_out, tgt_out, begin_date, date, src_out_after, tgt_out_after)
+
+
+if __name__ == "__main__":
+ main()