diff options
author | Patrick Simianer <p@simianer.de> | 2014-03-17 13:13:09 +0100 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2014-03-17 13:13:09 +0100 |
commit | 3bbbd92fc7b1f9d0d61f8573fee0d0b2f40960d3 (patch) | |
tree | 717a12c52c2409ba7d540738bd70beeedad1930a | |
parent | 4753aee16960de5b9c3da926dde18b332b34edf4 (diff) |
a lot of ... and --- cause moses' compound splitter to hang
-rwxr-xr-x | preprocess | 2 |
1 files changed, 1 insertions, 1 deletions
@@ -1,5 +1,5 @@ #!/bin/bash LANG=$1 -/toolbox/scripts/no_non_printables | /toolbox/scripts/htmlentities 2>htmlentities.$LANG.err | /toolbox/scripts/normalize_punctuation 2>normalize-punctuation.$LANG.err | /toolbox/moses/scripts/tokenizer/tokenizer.no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err | /toolbox/moses/scripts/tokenizer/lowercase.perl 2>lowercase.$LANG.err +/toolbox/scripts/no_non_printables | sed "s|[-,\.]\{4,\}|...|" | /toolbox/scripts/htmlentities 2>htmlentities.$LANG.err | /toolbox/scripts/normalize_punctuation 2>normalize-punctuation.$LANG.err | /toolbox/moses/scripts/tokenizer/tokenizer.no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err | /toolbox/moses/scripts/tokenizer/lowercase.perl 2>lowercase.$LANG.err |