diff options
author | Patrick Simianer <p@simianer.de> | 2011-09-23 20:53:15 +0200 |
---|---|---|
committer | Patrick Simianer <p@simianer.de> | 2011-09-23 20:53:15 +0200 |
commit | 9bde56ed23b4b97f8193f9f8f582f18086ff17c1 (patch) | |
tree | 83bd5687f2069405537f7f8fbdfbe208a634ca54 /dtrain/job | |
parent | 4433886ac335e6db7ded081b5ef673490ee27718 (diff) |
begin refactoring
Diffstat (limited to 'dtrain/job')
-rwxr-xr-x | dtrain/job/avgweights.rb | 30 | ||||
-rw-r--r-- | dtrain/job/cdec.ini | 8 | ||||
-rw-r--r-- | dtrain/job/dtrain.ini | 10 | ||||
-rwxr-xr-x | dtrain/job/dtrain.sh | 6 | ||||
-rwxr-xr-x | dtrain/job/hadoop-streaming-job.sh | 23 |
5 files changed, 0 insertions, 77 deletions
diff --git a/dtrain/job/avgweights.rb b/dtrain/job/avgweights.rb deleted file mode 100755 index e635aab4..00000000 --- a/dtrain/job/avgweights.rb +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env ruby1.9.1 - - -STDIN.set_encoding 'utf-8' - -#shard_count_key = "__SHARD_COUNT__" - -w = {} -#c = {} -w.default = 0 -#c.default = 0 -while line = STDIN.gets - key, val = line.split /\t/ - w[key] += val.to_f - #c[key] += 1.0 -end - -#shard_count = w["__SHARD_COUNT__"] - -num_map = 104.0 - -w.each_key { |k| - #if k == shard_count_key then next end - #if k == "__bias" then next end - puts "#{k}\t#{w[k]/num_map}" - #/c[k]}" #{w[k]/shard_count}" -} - -#puts "#{shard_count_key}\t#{w[shard_count_key]}" - diff --git a/dtrain/job/cdec.ini b/dtrain/job/cdec.ini deleted file mode 100644 index 0d32f0b7..00000000 --- a/dtrain/job/cdec.ini +++ /dev/null @@ -1,8 +0,0 @@ -formalism=scfg -add_pass_through_rules=true -feature_function=WordPenalty -cubepruning_pop_limit=30 -feature_function=KLanguageModel nc-wmt11.en.srilm.3.gz -feature_function=RuleIdentityFeatures -scfg_max_span_limit=15 - diff --git a/dtrain/job/dtrain.ini b/dtrain/job/dtrain.ini deleted file mode 100644 index 079d7d69..00000000 --- a/dtrain/job/dtrain.ini +++ /dev/null @@ -1,10 +0,0 @@ -decoder_config=cdec.ini -kbest=100 -ngrams=4 -epochs=10 -input=- -scorer=stupid_bleu -output=- -#stop_after=100 -#wprint=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough - diff --git a/dtrain/job/dtrain.sh b/dtrain/job/dtrain.sh deleted file mode 100755 index 75ec29ea..00000000 --- a/dtrain/job/dtrain.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh - -./dtrain -q -c dtrain.ini - -exit 0 - diff --git a/dtrain/job/hadoop-streaming-job.sh b/dtrain/job/hadoop-streaming-job.sh deleted file mode 100755 index 2cf3f50a..00000000 --- a/dtrain/job/hadoop-streaming-job.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -HADOOP_HOME=/usr/lib/hadoop-0.20 -JAR=contrib/streaming/hadoop-streaming-0.20.2-cdh3u1.jar -HSTREAMING="$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/$JAR" - -IN=in/nc-wmt11-de-en-dyer-cs-joshua.tok.lc.fixamp1.loo.psg.dtrain.1400m -OUT=out/nc-wmt11-de-en-dyer-cs-joshua.tok.lc.fixamp1.loo.psg.dtrain-weights-1400m - -$HSTREAMING \ - -mapper "dtrain.sh" \ - -reducer "avgweights.rb" \ - -input $IN \ - -output $OUT \ - -file avgweights.rb \ - -file dtrain.sh \ - -file dtrain \ - -file dtrain.ini \ - -file cdec.ini \ - -file nc-wmt11.en.srilm.3.gz \ - -jobconf mapred.reduce.tasks=1 \ - -jobconf mapred.max.map.failures.percent=100 - |