From 3bbbd92fc7b1f9d0d61f8573fee0d0b2f40960d3 Mon Sep 17 00:00:00 2001 From: Patrick Simianer
Date: Mon, 17 Mar 2014 13:13:09 +0100 Subject: a lot of ... and --- cause moses' compound splitter to hang --- preprocess | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'preprocess') diff --git a/preprocess b/preprocess index f3c3d10..69eaa54 100755 --- a/preprocess +++ b/preprocess @@ -1,5 +1,5 @@ #!/bin/bash LANG=$1 -/toolbox/scripts/no_non_printables | /toolbox/scripts/htmlentities 2>htmlentities.$LANG.err | /toolbox/scripts/normalize_punctuation 2>normalize-punctuation.$LANG.err | /toolbox/moses/scripts/tokenizer/tokenizer.no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err | /toolbox/moses/scripts/tokenizer/lowercase.perl 2>lowercase.$LANG.err +/toolbox/scripts/no_non_printables | sed "s|[-,\.]\{4,\}|...|" | /toolbox/scripts/htmlentities 2>htmlentities.$LANG.err | /toolbox/scripts/normalize_punctuation 2>normalize-punctuation.$LANG.err | /toolbox/moses/scripts/tokenizer/tokenizer.no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err | /toolbox/moses/scripts/tokenizer/lowercase.perl 2>lowercase.$LANG.err -- cgit v1.2.3