summaryrefslogtreecommitdiff
path: root/preprocess
blob: a46b0a821916e21cbc7c94e3e278e709cb99f828 (plain)
1
2
3
4
5
6
7
8
9
#!/bin/bash

pushd `dirname $0` > /dev/null
P=`pwd -P`
popd > /dev/null

LANG=$1
$P/no-non-printables | sed "s|[-,\.]\{4,\}|...|g" | $P/htmlentities 2>htmlentities.$LANG.err | $P/normalize-punctuation 2>normalize-punctuation.$LANG.err | $P/tokenizer-no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err | $P/lowercase.perl 2>lowercase.$LANG.err