summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dtrain/dtrain.cc89
-rw-r--r--dtrain/dtrain.h9
-rw-r--r--dtrain/test/example/dtrain.ini12
3 files changed, 48 insertions, 62 deletions
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc
index 05c3728d..27315358 100644
--- a/dtrain/dtrain.cc
+++ b/dtrain/dtrain.cc
@@ -7,21 +7,21 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
po::options_description ini("Configuration File Options");
ini.add_options()
("input", po::value<string>()->default_value("-"), "input file")
- ("output", po::value<string>()->default_value("-"), "output weights file")
+ ("output", po::value<string>()->default_value("-"), "output weights file, '-' for STDOUT")
("input_weights", po::value<string>(), "input weights file (e.g. from previous iteration)")
("decoder_config", po::value<string>(), "configuration file for cdec")
- ("k", po::value<unsigned>()->default_value(100), "size of kbest or sample from forest")
- ("sample_from", po::value<string>()->default_value("kbest"), "where to get translations from")
- ("filter", po::value<string>()->default_value("unique"), "filter kbest list")
- ("pair_sampling", po::value<string>()->default_value("all"), "how to sample pairs: all, rand")
- ("N", po::value<unsigned>()->default_value(3), "N for Ngrams")
- ("epochs", po::value<unsigned>()->default_value(2), "# of iterations T")
- ("scorer", po::value<string>()->default_value("stupid_bleu"), "scoring metric")
+ ("sample_from", po::value<string>()->default_value("kbest"), "where to sample translations from: kbest, forest")
+ ("k", po::value<unsigned>()->default_value(100), "how many translations to sample")
+ ("filter", po::value<string>()->default_value("unique"), "filter kbest list: no, unique")
+ ("pair_sampling", po::value<string>()->default_value("all"), "how to sample pairs: all, rand, 108010")
+ ("N", po::value<unsigned>()->default_value(3), "N for Ngrams (BLEU)")
+ ("epochs", po::value<unsigned>()->default_value(2), "# of iterations T (per shard)")
+ ("scorer", po::value<string>()->default_value("stupid_bleu"), "scoring: bleu, stupid_*, smooth_*, approx_*")
("stop_after", po::value<unsigned>()->default_value(0), "stop after X input sentences")
("print_weights", po::value<string>(), "weights to print on each iteration")
("hstreaming", po::value<bool>()->zero_tokens(), "run in hadoop streaming mode")
- ("learning_rate", po::value<weight_t>()->default_value(0.0005), "learning rate")
- ("gamma", po::value<weight_t>()->default_value(0), "gamma for SVM (0 for perceptron)")
+ ("learning_rate", po::value<weight_t>()->default_value(0.0005), "learning rate")
+ ("gamma", po::value<weight_t>()->default_value(0), "gamma for SVM (0 for perceptron)")
("tmp", po::value<string>()->default_value("/tmp"), "temp dir to use")
("select_weights", po::value<string>()->default_value("last"), "output 'best' or 'last' weights ('VOID' to throw away)")
("noup", po::value<bool>()->zero_tokens(), "do not update weights");
@@ -142,8 +142,6 @@ main(int argc, char** argv)
// meta params for perceptron, SVM
weight_t eta = cfg["learning_rate"].as<weight_t>();
weight_t gamma = cfg["gamma"].as<weight_t>();
- WordID __bias = FD::Convert("__bias");
- lambdas.add_value(__bias, 0);
string output_fn = cfg["output"].as<string>();
// input
@@ -158,7 +156,7 @@ main(int argc, char** argv)
ogzstream grammar_buf_out;
grammar_buf_out.open(grammar_buf_fn.c_str());
- unsigned in_sz = 999999999; // input index, input size
+ unsigned in_sz = UINT_MAX; // input index, input size
vector<pair<score_t, score_t> > all_scores;
score_t max_score = 0.;
unsigned best_it = 0;
@@ -286,23 +284,18 @@ main(int argc, char** argv)
// get (scored) samples
vector<ScoredHyp>* samples = observer->GetSamples();
- // FIXME
- /*if (verbose) {
- cout << "[ref: '";
- if (t > 0) cout << ref_ids_buf[ii]; <---
- else cout << ref_ids;
- cout << endl;
- cout << _p5 << _np << "1best: " << "'" << (*samples)[0].w << "'" << endl;
- cout << "SCORE=" << (*samples)[0].score << ",model="<< (*samples)[0].model << endl;
- cout << "F{" << (*samples)[0].f << "} ]" << endl << endl;
- }*/
- /*cout << lambdas.get(FD::Convert("PhraseModel_0")) << endl;
- cout << (*samples)[0].model << endl;
- cout << "1best: ";
- for (unsigned u = 0; u < (*samples)[0].w.size(); u++) cout << TD::Convert((*samples)[0].w[u]) << " ";
- cout << endl;
- cout << (*samples)[0].f << endl;
- cout << "___" << endl;*/
+ if (verbose) {
+ cerr << "--- ref for " << ii << " ";
+ if (t > 0) printWordIDVec(ref_ids_buf[ii]);
+ else printWordIDVec(ref_ids);
+ for (unsigned u = 0; u < samples->size(); u++) {
+ cerr << _p5 << _np << "[" << u << ". '";
+ printWordIDVec((*samples)[u].w);
+ cerr << "'" << endl;
+ cerr << "SCORE=" << (*samples)[0].score << ",model="<< (*samples)[0].model << endl;
+ cerr << "F{" << (*samples)[0].f << "} ]" << endl << endl;
+ }
+ }
score_sum += (*samples)[0].score;
model_sum += (*samples)[0].model;
@@ -320,43 +313,28 @@ main(int argc, char** argv)
for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin();
it != pairs.end(); it++) {
+ score_t rank_error = it->second.score - it->first.score;
if (!gamma) {
// perceptron
- if (it->first.score - it->second.score < 0) { // rank error
- SparseVector<weight_t> dv = it->second.f - it->first.f;
- dv.add_value(__bias, -1);
- lambdas.plus_eq_v_times_s(dv, eta);
+ if (rank_error > 0) {
+ SparseVector<weight_t> diff_vec = it->second.f - it->first.f;
+ lambdas.plus_eq_v_times_s(diff_vec, eta);
nup++;
}
} else {
// SVM
- score_t rank_error = it->second.score - it->first.score;
- if (rank_error > 0) {
- SparseVector<weight_t> dv = it->second.f - it->first.f;
- dv.add_value(__bias, -1);
- lambdas.plus_eq_v_times_s(dv, eta);
- }
- // regularization
score_t margin = it->first.model - it->second.model;
- if (rank_error || margin < 1) {
- lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta); // reg /= #EXAMPLES or #UPDATES ?
+ if (rank_error > 0 || margin < 1) {
+ SparseVector<weight_t> diff_vec = it->second.f - it->first.f;
+ lambdas.plus_eq_v_times_s(diff_vec, eta);
nup++;
}
+ // regularization
+ lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./npairs));
}
}
}
- // DEBUG
- vector<weight_t> x;
- lambdas.init_vector(&x);
- //cout << "[" << ii << "]" << endl;
- for (int jj = 0; jj < x.size(); jj++) {
- //if (x[jj] != 0)
- //cout << FD::Convert(jj) << " " << x[jj] << endl;
- }
- //cout << " --- " << endl;
- // /DEBUG
-
++ii;
if (hstreaming) cerr << "reporter:counter:dtrain,sid," << ii << endl;
@@ -375,8 +353,7 @@ main(int argc, char** argv)
// print some stats
score_t score_avg = score_sum/(score_t)in_sz;
score_t model_avg = model_sum/(score_t)in_sz;
- score_t score_diff;
- score_t model_diff;
+ score_t score_diff, model_diff;
if (t > 0) {
score_diff = score_avg - all_scores[t-1].first;
model_diff = model_avg - all_scores[t-1].second;
diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h
index 7c1509e4..f4d32ecb 100644
--- a/dtrain/dtrain.h
+++ b/dtrain/dtrain.h
@@ -3,6 +3,7 @@
#include <iomanip>
+#include <climits>
#include <string.h>
#include <boost/algorithm/string.hpp>
@@ -58,5 +59,13 @@ inline ostream& _p2(ostream& out) { return out << setprecision(2); }
inline ostream& _p5(ostream& out) { return out << setprecision(5); }
inline ostream& _p9(ostream& out) { return out << setprecision(9); }
+inline void printWordIDVec(vector<WordID>& v)
+{
+ for (unsigned i = 0; i < v.size(); i++) {
+ cerr << TD::Convert(v[i]);
+ if (i < v.size()-1) cerr << " ";
+ }
+}
+
#endif
diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini
index 185d6d90..40f8e03f 100644
--- a/dtrain/test/example/dtrain.ini
+++ b/dtrain/test/example/dtrain.ini
@@ -1,14 +1,14 @@
decoder_config=test/example/cdec.ini
k=100
N=3
-gamma=0
-epochs=5
+gamma=0.001
+epochs=20
input=test/example/nc-1k-tabs.gz
scorer=stupid_bleu
-output=-
-stop_after=100
-sample_from=kbest
-pair_sampling=all
+output=weights.gz
+#stop_after=100
+sample_from=forest
+pair_sampling=108010
select_weights=VOID
print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough
tmp=/tmp