From 606750f2487ed294dcdadcd99638eb5de80d1a0c Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Fri, 11 Nov 2011 11:28:24 +0100
Subject: better hstreaming reporting, new hstreaming wrapper
---
dtrain/dtrain.cc | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
(limited to 'dtrain/dtrain.cc')
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc
index 2fe7afd7..2d15f059 100644
--- a/dtrain/dtrain.cc
+++ b/dtrain/dtrain.cc
@@ -19,7 +19,7 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
("scorer", po::value()->default_value("stupid_bleu"), "scoring: bleu, stupid_*, smooth_*, approx_*")
("stop_after", po::value()->default_value(0), "stop after X input sentences")
("print_weights", po::value(), "weights to print on each iteration")
- ("hstreaming", po::value()->zero_tokens(), "run in hadoop streaming mode")
+ ("hstreaming", po::value()->default_value("N/A"), "run in hadoop streaming mode, arg is a task id")
("learning_rate", po::value()->default_value(0.0005), "learning rate")
("gamma", po::value()->default_value(0), "gamma for SVM (0 for perceptron)")
("tmp", po::value()->default_value("/tmp"), "temp dir to use")
@@ -91,11 +91,14 @@ main(int argc, char** argv)
bool noup = false;
if (cfg.count("noup")) noup = true;
bool hstreaming = false;
+ string task_id;
if (cfg.count("hstreaming")) {
hstreaming = true;
quiet = true;
+ task_id = cfg["hstreaming"].as();
cerr.precision(17);
}
+ HSReporter rep(task_id);
bool keep_w = false;
if (cfg.count("keep_w")) keep_w = true;
@@ -384,16 +387,18 @@ main(int argc, char** argv)
++ii;
- if (hstreaming) cerr << "reporter:counter:dtrain,count,1" << endl;
+ if (hstreaming) rep.update_counter("Seen", 1u);
} // input loop
- if (hstreaming && t == 0) cerr << "reporter:counter:dtrain,|input|," << ii+1 << endl;
-
if (scorer_str == "approx_bleu") scorer->Reset();
if (t == 0) {
in_sz = ii; // remember size of input (# lines)
+ if (hstreaming) {
+ rep.update_counter("|Input|", ii+1);
+ rep.update_gcounter("|Input|", ii+1);
+ }
}
#ifndef DTRAIN_LOCAL
@@ -415,10 +420,6 @@ main(int argc, char** argv)
score_diff = score_avg;
model_diff = model_avg;
}
- if (hstreaming) {
- cerr << "reporter:counter:dtrain,score avg it " << t+1 << "," << score_avg << endl;
- cerr << "reporter:counter:dtrain,model avg it " << t+1 << "," << model_avg << endl;
- }
if (!quiet) {
cerr << _p5 << _p << "WEIGHTS" << endl;
@@ -435,6 +436,14 @@ main(int argc, char** argv)
cerr << " avg #up: ";
cerr << nup/(float)in_sz << endl;
}
+
+ if (hstreaming) {
+ rep.update_counter("Score avg #"+boost::lexical_cast(t+1), score_avg);
+ rep.update_counter("Model avg #"+boost::lexical_cast(t+1), model_avg);
+ rep.update_counter("Pairs avg #"+boost::lexical_cast(t+1), npairs/(weight_t)in_sz);
+ rep.update_counter("Updates avg #"+boost::lexical_cast(t+1), nup/(weight_t)in_sz);
+ }
+
pair remember;
remember.first = score_avg;
remember.second = model_avg;
--
cgit v1.2.3