summaryrefslogtreecommitdiff
path: root/corpus/tokenize-anything.sh
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-03-15 11:39:18 +0100
committerPatrick Simianer <p@simianer.de>2013-03-15 11:39:18 +0100
commit3d8aa307d233f58cfe9ddcc616a8297551a738e3 (patch)
treeb9556fe62c7ac0d7b48d92481acdd06a546b1dc2 /corpus/tokenize-anything.sh
parentcf67d34738e1487f75739dc1e027b1864a06513b (diff)
parent9f1d72fa4dc231eb8cdb737becfc10452b5daef4 (diff)
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'corpus/tokenize-anything.sh')
-rwxr-xr-xcorpus/tokenize-anything.sh2
1 files changed, 1 insertions, 1 deletions
diff --git a/corpus/tokenize-anything.sh b/corpus/tokenize-anything.sh
index 1a24193d..028992cf 100755
--- a/corpus/tokenize-anything.sh
+++ b/corpus/tokenize-anything.sh
@@ -9,5 +9,5 @@ $SUPPORT/utf8-normalize.sh |
sed -e 's/ al - / al-/g' |
$SUPPORT/fix-contract.pl |
sed -e 's/^ //' | sed -e 's/ $//' |
- perl -e 'while(<>){s/(\d+)(\.+)$/$1 ./;print;}'
+ perl -e 'while(<>){s/(\d+)(\.+)$/$1 ./; s/(\d+)(\.+) \|\|\|/$1 . |||/; print;}'