summaryrefslogtreecommitdiff
path: root/corpus/support/utf8-normalize-batch.pl
blob: e574f861a06a872cce652e7c14bdb76f73f88819 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/usr/bin/env perl

use IPC::Open2;

$|++;

if (scalar(@ARGV) != 1) {
    print STDERR "usage: $0 \"CMD\"\n";
    exit(2);
}

$CMD = $ARGV[0];

while (<STDIN>) {
    s/\r\n*/\n/g;
    $PID = open2(*SOUT, *SIN, $CMD);
    print SIN "$_\n";
    close(SIN);
    $_ = <SOUT>;
    close(SOUT);
    waitpid($PID, 0);
    chomp;
    s/[\x00-\x1F]+/ /g;
    s/  +/ /g;
    s/^ //;
    s/ $//;
    print "$_\n";
}