summaryrefslogtreecommitdiff
path: root/corpus/support/utf8-normalize-batch.pl
diff options
context:
space:
mode:
Diffstat (limited to 'corpus/support/utf8-normalize-batch.pl')
-rwxr-xr-xcorpus/support/utf8-normalize-batch.pl28
1 files changed, 28 insertions, 0 deletions
diff --git a/corpus/support/utf8-normalize-batch.pl b/corpus/support/utf8-normalize-batch.pl
new file mode 100755
index 00000000..e574f861
--- /dev/null
+++ b/corpus/support/utf8-normalize-batch.pl
@@ -0,0 +1,28 @@
+#!/usr/bin/env perl
+
+use IPC::Open2;
+
+$|++;
+
+if (scalar(@ARGV) != 1) {
+ print STDERR "usage: $0 \"CMD\"\n";
+ exit(2);
+}
+
+$CMD = $ARGV[0];
+
+while (<STDIN>) {
+ s/\r\n*/\n/g;
+ $PID = open2(*SOUT, *SIN, $CMD);
+ print SIN "$_\n";
+ close(SIN);
+ $_ = <SOUT>;
+ close(SOUT);
+ waitpid($PID, 0);
+ chomp;
+ s/[\x00-\x1F]+/ /g;
+ s/ +/ /g;
+ s/^ //;
+ s/ $//;
+ print "$_\n";
+}