diff options
-rwxr-xr-x | preprocess_no_lower | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/preprocess_no_lower b/preprocess_no_lower new file mode 100755 index 0000000..3a4d358 --- /dev/null +++ b/preprocess_no_lower @@ -0,0 +1,9 @@ +#!/bin/bash + +pushd `dirname $0` > /dev/null +P=`pwd -P` +popd > /dev/null + +LANG=$1 +$P/no_non_printables | sed "s|[-,\.]\{4,\}|...|g" | $P/htmlentities 2>htmlentities.$LANG.err | $P/normalize_punctuation 2>normalize-punctuation.$LANG.err | $P/tokenizer-no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err + |