summaryrefslogtreecommitdiff
path: root/dtrain
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2011-10-14 17:34:58 +0200
committerPatrick Simianer <p@simianer.de>2011-10-14 17:34:58 +0200
commitce3b4db94d40c111ede321ac6de2bb061a81c4af (patch)
tree2302941c270b4a5b7ce33d422efa43c181462fed /dtrain
parent74b9ca617f1ec1e3f3a9b96de5fab17859392f3b (diff)
hstreaming, readme
Diffstat (limited to 'dtrain')
-rw-r--r--dtrain/README.md8
-rwxr-xr-xdtrain/hstreaming/hadoop-streaming-job.sh6
2 files changed, 9 insertions, 5 deletions
diff --git a/dtrain/README.md b/dtrain/README.md
index 4ed31d43..1ee3823e 100644
--- a/dtrain/README.md
+++ b/dtrain/README.md
@@ -51,11 +51,11 @@ Data
nc-v6.de-en peg
nc-v6.de-en.loo peg
nc-v6.de-en.giza.loo peg
-nc-v6.de-en.symgiza.loo pe
-nv-v6.de-en.cs pe
-nc-v6.de-en.cs.loo pe
+nc-v6.de-en.symgiza.loo peg
+nv-v6.de-en.cs peg
+nc-v6.de-en.cs.loo peg
--
-ep-v6.de-en.cs p
+ep-v6.de-en.cs pe
ep-v6.de-en.cs.loo p
p: prep, e: extract, g: grammar, d: dtrain
diff --git a/dtrain/hstreaming/hadoop-streaming-job.sh b/dtrain/hstreaming/hadoop-streaming-job.sh
index f51b6024..8eb9ac0e 100755
--- a/dtrain/hstreaming/hadoop-streaming-job.sh
+++ b/dtrain/hstreaming/hadoop-streaming-job.sh
@@ -1,5 +1,8 @@
#!/bin/bash
+ID=
+EXP=test
+
HADOOP_HOME=/usr/lib/hadoop-0.20
JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar
HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR"
@@ -18,5 +21,6 @@ $HSTREAMING \
-file cdec.ini \
-file nc-wmt11.en.srilm.3.gz \
-jobconf mapred.reduce.tasks=1 \
- -jobconf mapred.max.map.failures.percent=100
+ -jobconf mapred.max.map.failures.percent=100 \
+ -jobconf mapred.job.name="dtrain $ID $EXP"