SCRIPTS=~/src/scripts/ CDEC=~/src/cdec-dtrain/ DATA=../data/ NAME=news-commentary #$SCRIPTS/preprocess de < $DATA/$NAME.de.raw > $NAME.de.nof.nocs $SCRIPTS/preprocess de < $DATA/$NAME.de.raw > $NAME.de.nof $SCRIPTS/preprocess en < $DATA/$NAME.en.raw > $NAME.en.nof #$MOSES/scripts/generic/compound-splitter.perl -train -corpus $NAME.de.nof.nocs -model cs_model.de 2>compound-splitter-train.de.err #$MOSES/scripts/generic/compound-splitter.perl -model cs_model.de < $NAME.de.nof.nocs > $NAME.de.nof $SCRIPTS/no_empty $NAME.de.nof $NAME.en.nof $NAME.de $NAME.en $CDEC/corpus/paste-files.pl $NAME.de $NAME.en > $NAME mkdir lm && cd lm $CDEC/klm/lm/builder/lmplz -S 80% -T /tmp -o 4 < ../$NAME.en > $NAME.arpa.4 2>lmplz.err $CDEC/klm/lm/build_binary $NAME.arpa.4 $NAME.ken.4 2>build_binary.err cd .. && mkdir a && cd a $CDEC/word-aligner/fast_align -d -v -o -i ../$NAME > forward 2>forward.err $CDEC/word-aligner/fast_align -d -v -o -r -i ../$NAME > backward 2>backward.err $CDEC/utils/atools -i forward -j backward -c grow-diag-final-and > gdfa $CDEC/extractor/run_extractor -b news-commentary -a a/gdfa --leave_one_out --grammars g/ < news-commentary.de > news-commentary.de.sgm