diff options
Diffstat (limited to 'corpus/support/utf8-normalize.sh')
-rwxr-xr-x | corpus/support/utf8-normalize.sh | 26 |
1 files changed, 16 insertions, 10 deletions
diff --git a/corpus/support/utf8-normalize.sh b/corpus/support/utf8-normalize.sh index c85ae9f7..af9895ba 100755 --- a/corpus/support/utf8-normalize.sh +++ b/corpus/support/utf8-normalize.sh @@ -25,13 +25,19 @@ else fi fi -perl -e '$|++; while(<>){s/\r\n*/\n/g; print;}' | $CMD | /usr/bin/perl -w -e ' - $|++; - while (<>) { - chomp; - s/[\x00-\x1F]+/ /g; - s/ +/ /g; - s/^ //; - s/ $//; - print "$_\n"; - }' +if [[ $# == 1 && $1 == "--batchline" ]]; then + perl $(dirname $0)/utf8-normalize-batch.pl "$CMD" +else + perl -e '$|++; while(<>){s/\r\n*/\n/g; print;}' \ + |$CMD \ + |/usr/bin/perl -w -e ' + $|++; + while (<>) { + chomp; + s/[\x00-\x1F]+/ /g; + s/ +/ /g; + s/^ //; + s/ $//; + print "$_\n"; + }' +fi |