summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2013-12-12 19:41:27 +0100
committerPatrick Simianer <p@simianer.de>2013-12-12 19:41:27 +0100
commitb34ba08c04e4c10aca1879c484ed27119551aa09 (patch)
treeda861e54d5e5e33e3e6468075125161edce30a06
parent820c06de59307ad06557a603ffee08679fcf3e43 (diff)
parent85952929c7f4451a65597b589deade7b6c63403a (diff)
Merge branch 'master' of github.com:pks/scripts
-rwxr-xr-xpreprocess2
-rwxr-xr-xpreprocess_nolow5
2 files changed, 1 insertions, 6 deletions
diff --git a/preprocess b/preprocess
index 716255d..bc6b5d2 100755
--- a/preprocess
+++ b/preprocess
@@ -1,5 +1,5 @@
#!/bin/zsh
LANG=$1
-~/scripts/htmlentities 2>htmlentities.$LANG.err | ~/scripts/normalize-punctuation 2>normalize-punctuation.$LANG.err | ~/moses/scripts/tokenizer/tokenizer.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err | ~/moses/scripts/tokenizer/lowercase.perl 2>lowercase.$LANG.err
+/toolbox/scripts/htmlentities 2>htmlentities.$LANG.err | /toolbox/scripts/normalize_punctuation 2>normalize-punctuation.$LANG.err | /toolbox/moses/scripts/tokenizer/tokenizer.no-escape.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err | /toolbox/moses/scripts/tokenizer/lowercase.perl 2>lowercase.$LANG.err
diff --git a/preprocess_nolow b/preprocess_nolow
deleted file mode 100755
index fc466b6..0000000
--- a/preprocess_nolow
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/zsh
-
-LANG=$1
-~/scripts/htmlentities 2>htmlentities.$LANG.err | ~/scripts/normalize-punctuation 2>normalize-punctuation.$LANG.err | ~/moses/scripts/tokenizer/tokenizer.perl -a -b -threads 1 -l $LANG 2>tokenizer.$LANG.err
-