summaryrefslogtreecommitdiff
path: root/corpus/support/utf8-normalize.sh
diff options
context:
space:
mode:
authorMichael Denkowski <mdenkows@cs.cmu.edu>2013-09-05 11:26:22 -0700
committerMichael Denkowski <mdenkows@cs.cmu.edu>2013-09-05 11:26:22 -0700
commit76c26e382a9d5e2c95064488f060107e95470055 (patch)
tree7baf7cb1fe7af91b8a4d5b320350607e6b94dd0f /corpus/support/utf8-normalize.sh
parent7525905ca57bbd425f96a33768e3f6777ba86f7a (diff)
Unbuffered mode, flush after each line where possible, skip otherwise
Diffstat (limited to 'corpus/support/utf8-normalize.sh')
-rwxr-xr-xcorpus/support/utf8-normalize.sh3
1 files changed, 2 insertions, 1 deletions
diff --git a/corpus/support/utf8-normalize.sh b/corpus/support/utf8-normalize.sh
index 2f347854..c85ae9f7 100755
--- a/corpus/support/utf8-normalize.sh
+++ b/corpus/support/utf8-normalize.sh
@@ -25,7 +25,8 @@ else
fi
fi
-perl -e 'while(<>){s/\r\n*/\n/g; print;}' | $CMD | /usr/bin/perl -w -e '
+perl -e '$|++; while(<>){s/\r\n*/\n/g; print;}' | $CMD | /usr/bin/perl -w -e '
+ $|++;
while (<>) {
chomp;
s/[\x00-\x1F]+/ /g;