summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xpreprocess_no_lower9
1 files changed, 9 insertions, 0 deletions
diff --git a/preprocess_no_lower b/preprocess_no_lower
new file mode 100755
index 0000000..3a4d358
--- /dev/null
+++ b/preprocess_no_lower
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+pushd `dirname $0` > /dev/null
+P=`pwd -P`
+popd > /dev/null
+
+LANG=$1
+$P/no_non_printables | sed "s|[-,\.]\{4,\}|...|g" | $P/htmlentities 2>htmlentities.$LANG.err | $P/normalize_punctuation 2>normalize-punctuation.$LANG.err | $P/tokenizer-no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err
+