blob: fd4546ecff3646144f4e3120b639813ff49df32e (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
|
#!/bin/sh
egrep -v -i "^[[:space:]]*(<\?xml.*\?>|</?(mteval|doc|srcset|refset|translator|reviewer)[^>]*>)[[:space:]]*$" \
| egrep -v -i "^[[:space:]]*<(url|description|keywords|talkid|title|translator|reviewer)[^>]*>.*</(url|description|keywords|talkid|title|translator|reviewer)>[[:space:]]*$" \
| sed "s|<seg[^>]*>\s*||" \
| sed "s|\s*</seg>\s*$||" \
| egrep -v -i "^[[:space:]]*<p>[[:space:]]*$|^[[:space:]]*</p>[[:space:]]*$" \
| sed "s|<speaker>\s*||" \
| sed "s|\s*</speaker>\s*$||" \
| sed "s|\s*<hl>\s*$||" \
| sed "s|\s*</hl>\s*$||" \
| grep -v -P "^\s*$"
|