summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <patrick@lilt.com>2020-02-03 15:23:47 +0000
committerPatrick Simianer <patrick@lilt.com>2020-02-03 15:23:47 +0000
commit53c3d328dbe9a56b54f6d7dc51491ecf92081fef (patch)
tree7fecbeed405f4be607c4b6e13e99c2f837305c60
parenta8b1a9e3923c315a1ade856b0610f92489611cd5 (diff)
de-sgm
-rwxr-xr-xde-sgm7
1 files changed, 5 insertions, 2 deletions
diff --git a/de-sgm b/de-sgm
index 0b9177d..452edfe 100755
--- a/de-sgm
+++ b/de-sgm
@@ -1,7 +1,10 @@
#!/bin/sh
-
egrep -v "^[[:space:]]*(<\?xml.*\?>|</?(mteval|doc|srcset|refset|translator|reviewer)[^>]*>)[[:space:]]*$" \
| egrep -v "^[[:space:]]*<(url|description|keywords|talkid|title|translator|reviewer)[^>]*>.*</(url|description|keywords|talkid|title|translator|reviewer)>[[:space:]]*$" \
- | sed "s|<seg[^>]*>\s*||" | sed "s|\s*</seg>$||" | egrep -v "^[[:space:]]*<p>[[:space:]]*$|^[[:space:]]*</p>[[:space:]]*$"
+ | sed "s|<seg[^>]*>\s*||" \
+ | sed "s|\s*</seg>\s*$||" \
+ | egrep -v "^[[:space:]]*<p>[[:space:]]*$|^[[:space:]]*</p>[[:space:]]*$" \
+ | sed "s|<speaker>\s*||" \
+ | sed "s|\s*</speaker>\s*$||"