summaryrefslogtreecommitdiff
path: root/corpus/tokenize-anything.sh
diff options
context:
space:
mode:
Diffstat (limited to 'corpus/tokenize-anything.sh')
-rwxr-xr-xcorpus/tokenize-anything.sh6
1 files changed, 3 insertions, 3 deletions
diff --git a/corpus/tokenize-anything.sh b/corpus/tokenize-anything.sh
index 52739e81..a20a022f 100755
--- a/corpus/tokenize-anything.sh
+++ b/corpus/tokenize-anything.sh
@@ -4,12 +4,12 @@ ROOTDIR=`dirname $0`
SUPPORT=$ROOTDIR/support
if [[ $# == 1 && $1 == '-u' ]] ; then
- NORMCMD=cat
+ NORMARGS="--batchline"
else
- NORMCMD=$SUPPORT/utf8-normalize.sh
+ NORMARGS=""
fi
-$NORMCMD |
+$SUPPORT/utf8-normalize.sh $NORMARGS |
$SUPPORT/quote-norm.pl |
$SUPPORT/tokenizer.pl |
sed -u -e 's/ al - / al-/g' |