summaryrefslogtreecommitdiff
path: root/dtrain
diff options
context:
space:
mode:
authorPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-04-23 21:30:26 +0200
committerPatrick Simianer <simianer@cl.uni-heidelberg.de>2012-04-23 21:30:26 +0200
commitd3aa71d1095a5c45c1d3ca3155259e5fe0b58df2 (patch)
tree7752cecb6fad0102e4fabd1c9e935bee955d9566 /dtrain
parente91553ae70907e243a554e4a549c53df57b78478 (diff)
NEXT
Diffstat (limited to 'dtrain')
-rw-r--r--dtrain/NEXT6
-rw-r--r--dtrain/README.md2
-rw-r--r--dtrain/dtrain.h2
-rwxr-xr-xdtrain/hstreaming/avg.rb5
-rwxr-xr-xdtrain/hstreaming/lplp.rb4
5 files changed, 13 insertions, 6 deletions
diff --git a/dtrain/NEXT b/dtrain/NEXT
new file mode 100644
index 00000000..24939cf3
--- /dev/null
+++ b/dtrain/NEXT
@@ -0,0 +1,6 @@
+cuda vecs?
+target side rule ngrams
+decoder meta-parameters testing
+cdyer -> sa-extract -> loo?
+reranking while sgd
+
diff --git a/dtrain/README.md b/dtrain/README.md
index e28bebe7..f4e1abed 100644
--- a/dtrain/README.md
+++ b/dtrain/README.md
@@ -1,4 +1,4 @@
-This is a really fast (parallelizable) tuning method for cdec as used here:
+This is a simple (but parallelizable) tuning method for cdec, as used here:
"Joint Feature Selection in Distributed Stochastic
Learning for Large-Scale Discriminative Training in
SMT" Simianer, Riezler, Dyer
diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h
index 59ceb6f6..783aa179 100644
--- a/dtrain/dtrain.h
+++ b/dtrain/dtrain.h
@@ -13,7 +13,7 @@
#include "filelib.h"
-//#define DTRAIN_LOCAL
+#define DTRAIN_LOCAL
#define DTRAIN_DOTS 10 // when to display a '.'
#define DTRAIN_GRAMMAR_DELIM "########EOS########"
diff --git a/dtrain/hstreaming/avg.rb b/dtrain/hstreaming/avg.rb
index 91d4e29a..5deb62e4 100755
--- a/dtrain/hstreaming/avg.rb
+++ b/dtrain/hstreaming/avg.rb
@@ -22,10 +22,11 @@ else
end
w.each_key { |k|
if k == shard_count_key
- puts "# shard count: #{shard_count.to_i}"
+ #puts "# shard count: #{shard_count.to_i}"
+ next
else
puts "#{k}\t#{w[k]/shard_count}"
- puts "# #{c[k]}"
+ #puts "# #{c[k]}"
end
}
diff --git a/dtrain/hstreaming/lplp.rb b/dtrain/hstreaming/lplp.rb
index 40409bbd..57353adb 100755
--- a/dtrain/hstreaming/lplp.rb
+++ b/dtrain/hstreaming/lplp.rb
@@ -82,7 +82,7 @@ def _test()
cut(w, method(:mean), 1, 2.05)
exit
end
-_test()
+#_test()
# actually do something
def usage()
@@ -107,7 +107,7 @@ STDOUT.set_encoding 'utf-8'
w = {}
shard_count = 0
while line = STDIN.gets
- key, val = line.split /\t/
+ key, val = line.split /\s+/
if key == shard_count_key
shard_count += 1
next