summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xtmx-extract.py90
1 files changed, 45 insertions, 45 deletions
diff --git a/tmx-extract.py b/tmx-extract.py
index 20e4bac..90a298a 100755
--- a/tmx-extract.py
+++ b/tmx-extract.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python2
#
# Adapted from Apertium
# http://wiki.apertium.org/wiki/Tools_for_TMX
@@ -11,57 +11,57 @@ import sys
import codecs
class TMXHandler(ContentHandler):
- def __init__ (self, slang, tlang, sfile, tfile):
- self.pair = set([slang, tlang])
- self.files = {}
- self.files[slang] = sfile
- self.files[tlang] = tfile
- self.inTag = ''
- self.note = ''
- self.tuid = ''
- self.type = ''
- self.cur_pair = set()
- self.cur_lang = ''
- self.seg = {}
- self.seg[slang] = ''
- self.seg[tlang] = ''
-
- def startElement(self, name, attrs):
+ def __init__ (self, slang, tlang, sfile, tfile):
+ self.pair = set([slang, tlang])
+ self.files = {}
+ self.files[slang] = sfile
+ self.files[tlang] = tfile
+ self.inTag = ''
+ self.note = ''
+ self.tuid = ''
+ self.type = ''
+ self.cur_pair = set()
+ self.cur_lang = ''
+ self.seg = {}
+ self.seg[slang] = ''
+ self.seg[tlang] = ''
- if name == 'tu':
- self.cur_pair = set();
- self.inTag = 'tu'
- self.tuid = attrs.get('tuid','')
- self.type = attrs.get('datatype','')
- elif name == 'note':
- self.inTag = 'note'
- self.note = ""
- elif name == 'tuv':
- self.inTag = 'tuv'
- self.cur_lang = attrs.get('xml:lang', '')
- self.cur_pair.add(self.cur_lang)
- elif name == 'seg':
- self.inTag = 'seg'
- if self.cur_lang in self.pair:
- self.seg[self.cur_lang] = ''
+ def startElement(self, name, attrs):
- def characters (self, c):
- if self.inTag == 'note':
- self.note += c
- elif self.inTag == 'seg' and self.cur_lang in self.pair:
- self.seg[self.cur_lang] += c
+ if name == 'tu':
+ self.cur_pair = set()
+ self.inTag = 'tu'
+ self.tuid = attrs.get('tuid','')
+ self.type = attrs.get('datatype','')
+ elif name == 'note':
+ self.inTag = 'note'
+ self.note = ""
+ elif name == 'tuv':
+ self.inTag = 'tuv'
+ self.cur_lang = attrs.get('xml:lang', '')
+ self.cur_pair.add(self.cur_lang)
+ elif name == 'seg':
+ self.inTag = 'seg'
+ if self.cur_lang in self.pair:
+ self.seg[self.cur_lang] = ''
- def endElement(self, name):
- if name == 'tu' and self.pair == self.cur_pair:
- for lang in self.cur_pair:
- self.files[lang].write(self.seg[lang].encode('utf-8').strip()+"\n")
+ def characters (self, c):
+ if self.inTag == 'note':
+ self.note += c
+ elif self.inTag == 'seg' and self.cur_lang in self.pair:
+ self.seg[self.cur_lang] += c
+
+ def endElement(self, name):
+ if name == 'tu' and self.pair == self.cur_pair:
+ for lang in self.cur_pair:
+ self.files[lang].write(self.seg[lang].encode('utf-8').strip()+"\n")
parser = make_parser()
if len(sys.argv) < 3:
- print 'Usage: tmx-extract.py <file> <slang> <tlang>'
- print ''
- sys.exit(-1)
+ print 'Usage: tmx-extract.py <file> <slang> <tlang>'
+ print ''
+ sys.exit(-1)
sfile = open(sys.argv[1]+"."+sys.argv[2], 'w+')
tfile = open(sys.argv[1]+"."+sys.argv[3], 'w+')