diff options
Diffstat (limited to 'sa-extract')
| -rwxr-xr-x | sa-extract/sa2cdec.py | 19 | 
1 files changed, 19 insertions, 0 deletions
| diff --git a/sa-extract/sa2cdec.py b/sa-extract/sa2cdec.py new file mode 100755 index 00000000..55fb19f3 --- /dev/null +++ b/sa-extract/sa2cdec.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +import sys + +featNames = [ line.strip() for line in open(sys.argv[1]) if not line.startswith('#') ] + +for line in sys.stdin: +  try: +    (lhs, src, tgt, feats, align) = line.strip("\n").split(' ||| ') +  except: +    print >>sys.stderr, 'WARNING: No alignments:', line +    try: +      (lhs, src, tgt, feats) = line.strip().split(' ||| ') +      align = '' +    except: +      print >>sys.stderr, "ERROR: Malformed line:", line +      raise +  featValues = feats.split(' ') +  namedFeats = ' '.join( name+"="+value for (name, value) in zip(featNames, featValues) ) +  print " ||| ".join( (lhs, src, tgt, namedFeats, align) ) | 
