From cf2f68eca737c60f2490d81ea0fde9ef714123c3 Mon Sep 17 00:00:00 2001 From: "philblunsom@gmail.com" Date: Wed, 23 Jun 2010 18:53:17 +0000 Subject: git-svn-id: https://ws10smt.googlecode.com/svn/trunk@15 ec762483-ff6d-05da-a07a-a48fb63a330f --- gi/pyp-topics/scripts/run.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 gi/pyp-topics/scripts/run.sh diff --git a/gi/pyp-topics/scripts/run.sh b/gi/pyp-topics/scripts/run.sh new file mode 100644 index 00000000..19e625b1 --- /dev/null +++ b/gi/pyp-topics/scripts/run.sh @@ -0,0 +1,13 @@ +#!/bin/sh + + +./simple-extract-context.sh ~/workspace/clsp2010/jhuws2010/data/btec/split.zh-en.al 1 | ~/workspace/pyp-topics/scripts/contexts2documents.py > split.zh-en.data + +~/workspace/pyp-topics/bin/pyp-topics-train -d split.zh-en.data -t 50 -s 100 -o split.zh-en.documents.gz -w split.zh-en.topics.gz +gunzip split.zh-en.documents.gz + +~/workspace/cdec/extools/extractor -i ../jhuws2010/data/btec/split.zh-en.al -S 1 -c 500000 -L 12 --base_phrase_spans | ~/workspace/pyp-topics/scripts/spans2labels.py split.zh-en.phrases split.zh-en.contexts split.zh-en.documents > corpus.zh-en.labelled_spans + +paste -d " " ~/workspace/clsp2010/jhuws2010/data/btec/split.zh-en.al corpus.labelled_spans > split.zh-en.labelled_spans + +./simple-extract.sh ~/workspace/clsp2010/scratch/split.zh-en.labelled_spans -- cgit v1.2.3