diff options
author | Patrick Simianer <pks@pks.rocks> | 2020-08-12 07:32:06 +0200 |
---|---|---|
committer | Patrick Simianer <pks@pks.rocks> | 2020-08-12 07:32:06 +0200 |
commit | 64e8bdba930479249b8dfbc4b5d4b659a95433f0 (patch) | |
tree | e26969b03d8380ee8d2cbc1328f851772006133c /tmx-extract.py | |
parent | 74e20e00dfbffdcf117778049e47acd79e320110 (diff) | |
parent | 4732fb3be94ba3f88b18295cf1c00e8c616eec73 (diff) |
Merge branch 'master' of ssh://github.com/pks/nlp_scripts
Diffstat (limited to 'tmx-extract.py')
-rwxr-xr-x | tmx-extract.py | 30 |
1 files changed, 15 insertions, 15 deletions
diff --git a/tmx-extract.py b/tmx-extract.py index 90a298a..00f18f5 100755 --- a/tmx-extract.py +++ b/tmx-extract.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python3 # # Adapted from Apertium # http://wiki.apertium.org/wiki/Tools_for_TMX @@ -54,23 +54,23 @@ class TMXHandler(ContentHandler): def endElement(self, name): if name == 'tu' and self.pair == self.cur_pair: for lang in self.cur_pair: - self.files[lang].write(self.seg[lang].encode('utf-8').strip()+"\n") + self.files[lang].write("{}\n".format(self.seg[lang].replace("\n", " ").strip())) -parser = make_parser() -if len(sys.argv) < 3: - print 'Usage: tmx-extract.py <file> <slang> <tlang>' - print '' - sys.exit(-1) +if __name__ == "__main__": + parser = make_parser() -sfile = open(sys.argv[1]+"."+sys.argv[2], 'w+') -tfile = open(sys.argv[1]+"."+sys.argv[3], 'w+') -curHandler = TMXHandler(sys.argv[2], sys.argv[3], sfile, tfile) + if len(sys.argv) < 3: + print('Usage: tmx-extract.py <file> <slang> <tlang>') + print('') + sys.exit(-1) -parser.setContentHandler(curHandler) + sfile_path = sys.argv[1] + "." + sys.argv[2] + tfile_path = sys.argv[1] + "." + sys.argv[3] -parser.parse(open(sys.argv[1])) - -sfile.close() -tfile.close() + with open(sfile_path, 'w+') as sfile, open(tfile_path, 'w+') as tfile: + curHandler = TMXHandler(sys.argv[2], sys.argv[3], sfile, tfile) + parser.setContentHandler(curHandler) + with open(sys.argv[1], 'r') as tmx_file: + parser.parse(tmx_file) |