diff options
author | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-04-23 21:30:26 +0200 |
---|---|---|
committer | Patrick Simianer <simianer@cl.uni-heidelberg.de> | 2012-04-23 21:30:26 +0200 |
commit | d3aa71d1095a5c45c1d3ca3155259e5fe0b58df2 (patch) | |
tree | 7752cecb6fad0102e4fabd1c9e935bee955d9566 /dtrain | |
parent | e91553ae70907e243a554e4a549c53df57b78478 (diff) |
NEXT
Diffstat (limited to 'dtrain')
-rw-r--r-- | dtrain/NEXT | 6 | ||||
-rw-r--r-- | dtrain/README.md | 2 | ||||
-rw-r--r-- | dtrain/dtrain.h | 2 | ||||
-rwxr-xr-x | dtrain/hstreaming/avg.rb | 5 | ||||
-rwxr-xr-x | dtrain/hstreaming/lplp.rb | 4 |
5 files changed, 13 insertions, 6 deletions
diff --git a/dtrain/NEXT b/dtrain/NEXT new file mode 100644 index 00000000..24939cf3 --- /dev/null +++ b/dtrain/NEXT @@ -0,0 +1,6 @@ +cuda vecs? +target side rule ngrams +decoder meta-parameters testing +cdyer -> sa-extract -> loo? +reranking while sgd + diff --git a/dtrain/README.md b/dtrain/README.md index e28bebe7..f4e1abed 100644 --- a/dtrain/README.md +++ b/dtrain/README.md @@ -1,4 +1,4 @@ -This is a really fast (parallelizable) tuning method for cdec as used here: +This is a simple (but parallelizable) tuning method for cdec, as used here: "Joint Feature Selection in Distributed Stochastic Learning for Large-Scale Discriminative Training in SMT" Simianer, Riezler, Dyer diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h index 59ceb6f6..783aa179 100644 --- a/dtrain/dtrain.h +++ b/dtrain/dtrain.h @@ -13,7 +13,7 @@ #include "filelib.h" -//#define DTRAIN_LOCAL +#define DTRAIN_LOCAL #define DTRAIN_DOTS 10 // when to display a '.' #define DTRAIN_GRAMMAR_DELIM "########EOS########" diff --git a/dtrain/hstreaming/avg.rb b/dtrain/hstreaming/avg.rb index 91d4e29a..5deb62e4 100755 --- a/dtrain/hstreaming/avg.rb +++ b/dtrain/hstreaming/avg.rb @@ -22,10 +22,11 @@ else end w.each_key { |k| if k == shard_count_key - puts "# shard count: #{shard_count.to_i}" + #puts "# shard count: #{shard_count.to_i}" + next else puts "#{k}\t#{w[k]/shard_count}" - puts "# #{c[k]}" + #puts "# #{c[k]}" end } diff --git a/dtrain/hstreaming/lplp.rb b/dtrain/hstreaming/lplp.rb index 40409bbd..57353adb 100755 --- a/dtrain/hstreaming/lplp.rb +++ b/dtrain/hstreaming/lplp.rb @@ -82,7 +82,7 @@ def _test() cut(w, method(:mean), 1, 2.05) exit end -_test() +#_test() # actually do something def usage() @@ -107,7 +107,7 @@ STDOUT.set_encoding 'utf-8' w = {} shard_count = 0 while line = STDIN.gets - key, val = line.split /\t/ + key, val = line.split /\s+/ if key == shard_count_key shard_count += 1 next |