blob: f7f131a0e7c4906d6c90614f5641c9c07fe4432d (
plain)
1
2
3
4
5
6
7
8
9
10
|
# THIS FILE GIVES THE LOCATIONS OF THE CORPORA USED
# name path aligned-corpus LM dev dev-refs test1 testt-eval.sh ...
/export/ws10smt/data
btec /export/ws10smt/data/btec/ split.zh-en.al lm/en.3gram.lm.gz devtest/devset1_2.zh devtest/devset1_2.lc.en* devtest/devset3.zh eval-devset3.sh
fbis /export/ws10smt/data/chinese-english.fbis corpus.zh-en.al
zhen /export/ws10smt/data/chinese-english corpus.zh-en.al
aren /export/ws10smt/data/arabic-english corpus.ar-en.al
uren /export/ws10smt/data/urdu-english corpus.ur-en.al lm/u2e.en.lm.gz dev/dev.ur dev/dev.en* devtest/devtest.ur eval-devtest.sh
nlfr /export/ws10smt/data/dutch-french corpus.nl-fr.al
|