summaryrefslogtreecommitdiff
path: root/gi
diff options
context:
space:
mode:
authorphilblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-23 18:53:17 +0000
committerphilblunsom@gmail.com <philblunsom@gmail.com@ec762483-ff6d-05da-a07a-a48fb63a330f>2010-06-23 18:53:17 +0000
commit7c26e270a555d524c4e6eebf572e115213ed2695 (patch)
tree0cf1e01ec4d40e7e36a9cfcd00b6e01c30c279a2 /gi
parentccbf1b2c984709e5bbbefe0ffb36e4b92c156989 (diff)
git-svn-id: https://ws10smt.googlecode.com/svn/trunk@15 ec762483-ff6d-05da-a07a-a48fb63a330f
Diffstat (limited to 'gi')
-rw-r--r--gi/pyp-topics/scripts/run.sh13
1 files changed, 13 insertions, 0 deletions
diff --git a/gi/pyp-topics/scripts/run.sh b/gi/pyp-topics/scripts/run.sh
new file mode 100644
index 00000000..19e625b1
--- /dev/null
+++ b/gi/pyp-topics/scripts/run.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+
+./simple-extract-context.sh ~/workspace/clsp2010/jhuws2010/data/btec/split.zh-en.al 1 | ~/workspace/pyp-topics/scripts/contexts2documents.py > split.zh-en.data
+
+~/workspace/pyp-topics/bin/pyp-topics-train -d split.zh-en.data -t 50 -s 100 -o split.zh-en.documents.gz -w split.zh-en.topics.gz
+gunzip split.zh-en.documents.gz
+
+~/workspace/cdec/extools/extractor -i ../jhuws2010/data/btec/split.zh-en.al -S 1 -c 500000 -L 12 --base_phrase_spans | ~/workspace/pyp-topics/scripts/spans2labels.py split.zh-en.phrases split.zh-en.contexts split.zh-en.documents > corpus.zh-en.labelled_spans
+
+paste -d " " ~/workspace/clsp2010/jhuws2010/data/btec/split.zh-en.al corpus.labelled_spans > split.zh-en.labelled_spans
+
+./simple-extract.sh ~/workspace/clsp2010/scratch/split.zh-en.labelled_spans