summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <patrick@lilt.com>2020-03-09 17:48:55 +0000
committerPatrick Simianer <patrick@lilt.com>2020-03-09 17:48:55 +0000
commit7d2fd2bf643671377e990b1c944aa3650397e3da (patch)
tree13e2b7b00d122a90988231f60b74945587ad86f6
parent5178c4f31dd3b8eb1f1cba2b632863f8a92af029 (diff)
de-sgm: match more stuff
-rwxr-xr-xde-sgm11
1 files changed, 7 insertions, 4 deletions
diff --git a/de-sgm b/de-sgm
index 452edfe..fd4546e 100755
--- a/de-sgm
+++ b/de-sgm
@@ -1,10 +1,13 @@
#!/bin/sh
-egrep -v "^[[:space:]]*(<\?xml.*\?>|</?(mteval|doc|srcset|refset|translator|reviewer)[^>]*>)[[:space:]]*$" \
- | egrep -v "^[[:space:]]*<(url|description|keywords|talkid|title|translator|reviewer)[^>]*>.*</(url|description|keywords|talkid|title|translator|reviewer)>[[:space:]]*$" \
+egrep -v -i "^[[:space:]]*(<\?xml.*\?>|</?(mteval|doc|srcset|refset|translator|reviewer)[^>]*>)[[:space:]]*$" \
+ | egrep -v -i "^[[:space:]]*<(url|description|keywords|talkid|title|translator|reviewer)[^>]*>.*</(url|description|keywords|talkid|title|translator|reviewer)>[[:space:]]*$" \
| sed "s|<seg[^>]*>\s*||" \
| sed "s|\s*</seg>\s*$||" \
- | egrep -v "^[[:space:]]*<p>[[:space:]]*$|^[[:space:]]*</p>[[:space:]]*$" \
+ | egrep -v -i "^[[:space:]]*<p>[[:space:]]*$|^[[:space:]]*</p>[[:space:]]*$" \
| sed "s|<speaker>\s*||" \
- | sed "s|\s*</speaker>\s*$||"
+ | sed "s|\s*</speaker>\s*$||" \
+ | sed "s|\s*<hl>\s*$||" \
+ | sed "s|\s*</hl>\s*$||" \
+ | grep -v -P "^\s*$"