From 941da973ffd77b5b0accb64cb4ffe7f593dcd5f8 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Wed, 11 Dec 2013 16:56:56 +0100 Subject: use moses tokenizer but without escaping of 'special' chars --- preprocess | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'preprocess') diff --git a/preprocess b/preprocess index 9273cfb..14eb73a 100755 --- a/preprocess +++ b/preprocess @@ -1,5 +1,5 @@ #!/bin/zsh LANG=$1 -/toolbox/scripts/htmlentities 2>htmlentities.$LANG.err | /toolbox/scripts/normalize_punctuation 2>normalize-punctuation.$LANG.err | /toolbox/moses/scripts/tokenizer/tokenizer.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err | /toolbox/moses/scripts/tokenizer/lowercase.perl 2>lowercase.$LANG.err +/toolbox/scripts/htmlentities 2>htmlentities.$LANG.err | /toolbox/scripts/normalize_punctuation 2>normalize-punctuation.$LANG.err | /toolbox/moses/scripts/tokenizer/tokenizer.no-esc.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err | /toolbox/moses/scripts/tokenizer/lowercase.perl 2>lowercase.$LANG.err -- cgit v1.2.3