blob: af9895ba0e2e52e213309dc757b710efc0dff0d9 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
|
#!/bin/bash
# this is the location on malbec, if you want to run on another machine
# ICU may be installed in /usr or /usr/local
ICU_DIR=/usr0/tools/icu
UCONV_BIN=$ICU_DIR/bin/uconv
UCONV_LIB=$ICU_DIR/lib
if [ -e $UCONV_BIN ] && [ -d $UCONV_LIB ]
then
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$UCONV_LIB
if [ ! -x $UCONV_BIN ]
then
echo "$0: Cannot execute $UCONV_BIN! Please fix." 1>&2
exit
fi
CMD="$UCONV_BIN -f utf8 -t utf8 -x Any-NFKC --callback skip"
else
if which uconv > /dev/null
then
CMD="uconv -f utf8 -t utf8 -x Any-NFKC --callback skip"
else
echo "$0: Cannot find ICU uconv (http://site.icu-project.org/) ... falling back to iconv. Quality may suffer." 1>&2
CMD="iconv -f utf8 -t utf8 -c"
fi
fi
if [[ $# == 1 && $1 == "--batchline" ]]; then
perl $(dirname $0)/utf8-normalize-batch.pl "$CMD"
else
perl -e '$|++; while(<>){s/\r\n*/\n/g; print;}' \
|$CMD \
|/usr/bin/perl -w -e '
$|++;
while (<>) {
chomp;
s/[\x00-\x1F]+/ /g;
s/ +/ /g;
s/^ //;
s/ $//;
print "$_\n";
}'
fi
|