From c55f37fee9f43c0a13b47aac512804ecf9f5bd48 Mon Sep 17 00:00:00 2001 From: Michael Denkowski Date: Thu, 5 Sep 2013 14:15:43 -0700 Subject: Slower but correct (wrt buffered) unbuffered version. --- corpus/support/utf8-normalize-batch.pl | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100755 corpus/support/utf8-normalize-batch.pl (limited to 'corpus/support/utf8-normalize-batch.pl') diff --git a/corpus/support/utf8-normalize-batch.pl b/corpus/support/utf8-normalize-batch.pl new file mode 100755 index 00000000..e574f861 --- /dev/null +++ b/corpus/support/utf8-normalize-batch.pl @@ -0,0 +1,28 @@ +#!/usr/bin/env perl + +use IPC::Open2; + +$|++; + +if (scalar(@ARGV) != 1) { + print STDERR "usage: $0 \"CMD\"\n"; + exit(2); +} + +$CMD = $ARGV[0]; + +while () { + s/\r\n*/\n/g; + $PID = open2(*SOUT, *SIN, $CMD); + print SIN "$_\n"; + close(SIN); + $_ = ; + close(SOUT); + waitpid($PID, 0); + chomp; + s/[\x00-\x1F]+/ /g; + s/ +/ /g; + s/^ //; + s/ $//; + print "$_\n"; +} -- cgit v1.2.3