summaryrefslogtreecommitdiff
path: root/tmx-extract.py
diff options
context:
space:
mode:
authorPatrick Simianer <pks@pks.rocks>2020-08-12 07:32:06 +0200
committerPatrick Simianer <pks@pks.rocks>2020-08-12 07:32:06 +0200
commit64e8bdba930479249b8dfbc4b5d4b659a95433f0 (patch)
treee26969b03d8380ee8d2cbc1328f851772006133c /tmx-extract.py
parent74e20e00dfbffdcf117778049e47acd79e320110 (diff)
parent4732fb3be94ba3f88b18295cf1c00e8c616eec73 (diff)
Merge branch 'master' of ssh://github.com/pks/nlp_scripts
Diffstat (limited to 'tmx-extract.py')
-rwxr-xr-xtmx-extract.py30
1 files changed, 15 insertions, 15 deletions
diff --git a/tmx-extract.py b/tmx-extract.py
index 90a298a..00f18f5 100755
--- a/tmx-extract.py
+++ b/tmx-extract.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2
+#!/usr/bin/python3
#
# Adapted from Apertium
# http://wiki.apertium.org/wiki/Tools_for_TMX
@@ -54,23 +54,23 @@ class TMXHandler(ContentHandler):
def endElement(self, name):
if name == 'tu' and self.pair == self.cur_pair:
for lang in self.cur_pair:
- self.files[lang].write(self.seg[lang].encode('utf-8').strip()+"\n")
+ self.files[lang].write("{}\n".format(self.seg[lang].replace("\n", " ").strip()))
-parser = make_parser()
-if len(sys.argv) < 3:
- print 'Usage: tmx-extract.py <file> <slang> <tlang>'
- print ''
- sys.exit(-1)
+if __name__ == "__main__":
+ parser = make_parser()
-sfile = open(sys.argv[1]+"."+sys.argv[2], 'w+')
-tfile = open(sys.argv[1]+"."+sys.argv[3], 'w+')
-curHandler = TMXHandler(sys.argv[2], sys.argv[3], sfile, tfile)
+ if len(sys.argv) < 3:
+ print('Usage: tmx-extract.py <file> <slang> <tlang>')
+ print('')
+ sys.exit(-1)
-parser.setContentHandler(curHandler)
+ sfile_path = sys.argv[1] + "." + sys.argv[2]
+ tfile_path = sys.argv[1] + "." + sys.argv[3]
-parser.parse(open(sys.argv[1]))
-
-sfile.close()
-tfile.close()
+ with open(sfile_path, 'w+') as sfile, open(tfile_path, 'w+') as tfile:
+ curHandler = TMXHandler(sys.argv[2], sys.argv[3], sfile, tfile)
+ parser.setContentHandler(curHandler)
+ with open(sys.argv[1], 'r') as tmx_file:
+ parser.parse(tmx_file)