summaryrefslogtreecommitdiff
path: root/corpus/support/utf8-normalize.sh
diff options
context:
space:
mode:
Diffstat (limited to 'corpus/support/utf8-normalize.sh')
-rwxr-xr-xcorpus/support/utf8-normalize.sh26
1 files changed, 16 insertions, 10 deletions
diff --git a/corpus/support/utf8-normalize.sh b/corpus/support/utf8-normalize.sh
index c85ae9f7..af9895ba 100755
--- a/corpus/support/utf8-normalize.sh
+++ b/corpus/support/utf8-normalize.sh
@@ -25,13 +25,19 @@ else
fi
fi
-perl -e '$|++; while(<>){s/\r\n*/\n/g; print;}' | $CMD | /usr/bin/perl -w -e '
- $|++;
- while (<>) {
- chomp;
- s/[\x00-\x1F]+/ /g;
- s/ +/ /g;
- s/^ //;
- s/ $//;
- print "$_\n";
- }'
+if [[ $# == 1 && $1 == "--batchline" ]]; then
+ perl $(dirname $0)/utf8-normalize-batch.pl "$CMD"
+else
+ perl -e '$|++; while(<>){s/\r\n*/\n/g; print;}' \
+ |$CMD \
+ |/usr/bin/perl -w -e '
+ $|++;
+ while (<>) {
+ chomp;
+ s/[\x00-\x1F]+/ /g;
+ s/ +/ /g;
+ s/^ //;
+ s/ $//;
+ print "$_\n";
+ }'
+fi