diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2013-12-11 16:56:56 +0100 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2013-12-11 16:56:56 +0100 |
commit | 941da973ffd77b5b0accb64cb4ffe7f593dcd5f8 (patch) | |
tree | 881640442c5171175945204c6626a08fda57a429 /preprocess | |
parent | 36e78054419ccf38fb3049e32120b5704de4ba85 (diff) |
use moses tokenizer but without escaping of 'special' chars
Diffstat (limited to 'preprocess')
-rwxr-xr-x | preprocess | 2 |
1 files changed, 1 insertions, 1 deletions
@@ -1,5 +1,5 @@ #!/bin/zsh LANG=$1 -/toolbox/scripts/htmlentities 2>htmlentities.$LANG.err | /toolbox/scripts/normalize_punctuation 2>normalize-punctuation.$LANG.err | /toolbox/moses/scripts/tokenizer/tokenizer.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err | /toolbox/moses/scripts/tokenizer/lowercase.perl 2>lowercase.$LANG.err +/toolbox/scripts/htmlentities 2>htmlentities.$LANG.err | /toolbox/scripts/normalize_punctuation 2>normalize-punctuation.$LANG.err | /toolbox/moses/scripts/tokenizer/tokenizer.no-esc.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err | /toolbox/moses/scripts/tokenizer/lowercase.perl 2>lowercase.$LANG.err |