diff options
author | Patrick Simianer <p@simianer.de> | 2011-10-14 17:34:58 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2011-10-14 17:34:58 +0200 |
commit | 68f158b11df9f4072699fe6a4c8022ea54102b28 (patch) | |
tree | 7806a1b660921eda128cc049c0f6b843e0915dbf | |
parent | 85c8b5036ad69f468017034f1e3b7959f0cb221d (diff) |
hstreaming, readme
-rw-r--r-- | dtrain/README.md | 8 | ||||
-rwxr-xr-x | dtrain/hstreaming/hadoop-streaming-job.sh | 6 |
2 files changed, 9 insertions, 5 deletions
diff --git a/dtrain/README.md b/dtrain/README.md index 4ed31d43..1ee3823e 100644 --- a/dtrain/README.md +++ b/dtrain/README.md @@ -51,11 +51,11 @@ Data nc-v6.de-en peg nc-v6.de-en.loo peg nc-v6.de-en.giza.loo peg -nc-v6.de-en.symgiza.loo pe -nv-v6.de-en.cs pe -nc-v6.de-en.cs.loo pe +nc-v6.de-en.symgiza.loo peg +nv-v6.de-en.cs peg +nc-v6.de-en.cs.loo peg -- -ep-v6.de-en.cs p +ep-v6.de-en.cs pe ep-v6.de-en.cs.loo p p: prep, e: extract, g: grammar, d: dtrain diff --git a/dtrain/hstreaming/hadoop-streaming-job.sh b/dtrain/hstreaming/hadoop-streaming-job.sh index f51b6024..8eb9ac0e 100755 --- a/dtrain/hstreaming/hadoop-streaming-job.sh +++ b/dtrain/hstreaming/hadoop-streaming-job.sh @@ -1,5 +1,8 @@ #!/bin/bash +ID= +EXP=test + HADOOP_HOME=/usr/lib/hadoop-0.20 JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR" @@ -18,5 +21,6 @@ $HSTREAMING \ -file cdec.ini \ -file nc-wmt11.en.srilm.3.gz \ -jobconf mapred.reduce.tasks=1 \ - -jobconf mapred.max.map.failures.percent=100 + -jobconf mapred.max.map.failures.percent=100 \ + -jobconf mapred.job.name="dtrain $ID $EXP" |