summaryrefslogtreecommitdiff
path: root/training/dtrain
diff options
context:
space:
mode:
Diffstat (limited to 'training/dtrain')
-rw-r--r--training/dtrain/dtrain.cc32
-rw-r--r--training/dtrain/dtrain.h2
-rw-r--r--training/dtrain/pairsampling.h1
-rw-r--r--training/dtrain/score.cc18
-rw-r--r--training/dtrain/score.h18
5 files changed, 42 insertions, 29 deletions
diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc
index 0a27a068..b01cf421 100644
--- a/training/dtrain/dtrain.cc
+++ b/training/dtrain/dtrain.cc
@@ -44,7 +44,7 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
("pclr", po::value<string>()->default_value("no"), "use a (simple|adagrad) per-coordinate learning rate")
("batch", po::value<bool>()->zero_tokens(), "do batch optimization")
("repeat", po::value<unsigned>()->default_value(1), "repeat optimization over kbest list this number of times")
- //("test-k-best", po::value<bool>()->zero_tokens(), "check if optimization works (use repeat >= 2)")
+ ("check", po::value<bool>()->zero_tokens(), "produce list of loss differentials")
("noup", po::value<bool>()->zero_tokens(), "do not update weights");
po::options_description cl("Command Line Options");
cl.add_options()
@@ -130,8 +130,8 @@ main(int argc, char** argv)
const score_t approx_bleu_d = cfg["approx_bleu_d"].as<score_t>();
const unsigned max_pairs = cfg["max_pairs"].as<unsigned>();
int repeat = cfg["repeat"].as<unsigned>();
- //bool test_k_best = false;
- //if (cfg.count("test-k-best")) test_k_best = true;
+ bool check = false;
+ if (cfg.count("check")) check = true;
weight_t loss_margin = cfg["loss_margin"].as<weight_t>();
bool batch = false;
if (cfg.count("batch")) batch = true;
@@ -412,27 +412,38 @@ main(int argc, char** argv)
int cur_npairs = pairs.size();
npairs += cur_npairs;
- score_t kbest_loss_first, kbest_loss_last = 0.0;
+ score_t kbest_loss_first = 0.0, kbest_loss_last = 0.0;
+
+ if (check) repeat = 2;
+ vector<float> losses; // for check
for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin();
it != pairs.end(); it++) {
score_t model_diff = it->first.model - it->second.model;
- kbest_loss_first += max(0.0, -1.0 * model_diff);
+ score_t loss = max(0.0, -1.0 * model_diff);
+ losses.push_back(loss);
+ kbest_loss_first += loss;
}
+ score_t kbest_loss = 0.0;
for (int ki=0; ki < repeat; ki++) {
- score_t kbest_loss = 0.0; // test-k-best
SparseVector<weight_t> lambdas_copy; // for l1 regularization
SparseVector<weight_t> sum_up; // for pclr
if (l1naive||l1clip||l1cumul) lambdas_copy = lambdas;
+ unsigned pair_idx = 0; // for check
for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin();
it != pairs.end(); it++) {
score_t model_diff = it->first.model - it->second.model;
+ score_t loss = max(0.0, -1.0 * model_diff);
+
+ if (check && ki == 1) cout << losses[pair_idx] - loss << endl;
+ pair_idx++;
+
if (repeat > 1) {
model_diff = lambdas.dot(it->first.f) - lambdas.dot(it->second.f);
- kbest_loss += max(0.0, -1.0 * model_diff);
+ kbest_loss += loss;
}
bool rank_error = false;
score_t margin;
@@ -449,7 +460,7 @@ main(int argc, char** argv)
if (rank_error || margin < loss_margin) {
SparseVector<weight_t> diff_vec = it->first.f - it->second.f;
if (batch) {
- batch_loss += max(0., -1.0*model_diff);
+ batch_loss += max(0., -1.0 * model_diff);
batch_updates += diff_vec;
continue;
}
@@ -529,9 +540,8 @@ main(int argc, char** argv)
if (ki==repeat-1) { // done
kbest_loss_last = kbest_loss;
if (repeat > 1) {
- score_t best_score = -1.;
score_t best_model = -std::numeric_limits<score_t>::max();
- unsigned best_idx;
+ unsigned best_idx = 0;
for (unsigned i=0; i < samples->size(); i++) {
score_t s = lambdas.dot((*samples)[i].f);
if (s > best_model) {
@@ -634,6 +644,8 @@ main(int argc, char** argv)
Weights::WriteToFile(w_fn, decoder_weights, true);
}
+ if (check) cout << "---" << endl;
+
} // outer loop
if (average) w_average /= (weight_t)T;
diff --git a/training/dtrain/dtrain.h b/training/dtrain/dtrain.h
index ccb5ad4d..eb23b813 100644
--- a/training/dtrain/dtrain.h
+++ b/training/dtrain/dtrain.h
@@ -64,7 +64,7 @@ struct LocalScorer
vector<score_t> w_;
virtual score_t
- Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned rank, const unsigned src_len)=0;
+ Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned rank, const unsigned src_len)=0;
virtual void Reset() {} // only for ApproxBleuScorer, LinearBleuScorer
diff --git a/training/dtrain/pairsampling.h b/training/dtrain/pairsampling.h
index 3f67e209..1a3c498c 100644
--- a/training/dtrain/pairsampling.h
+++ b/training/dtrain/pairsampling.h
@@ -112,6 +112,7 @@ _PRO_cmp_pair_by_diff_d(pair<ScoredHyp,ScoredHyp> a, pair<ScoredHyp,ScoredHyp> b
inline void
PROsampling(vector<ScoredHyp>* s, vector<pair<ScoredHyp,ScoredHyp> >& training, score_t threshold, unsigned max, bool _unused=false, float _also_unused=0)
{
+ sort(s->begin(), s->end(), cmp_hyp_by_score_d);
unsigned max_count = 5000, count = 0, sz = s->size();
bool b = false;
for (unsigned i = 0; i < sz-1; i++) {
diff --git a/training/dtrain/score.cc b/training/dtrain/score.cc
index 96d6e10a..127f34d2 100644
--- a/training/dtrain/score.cc
+++ b/training/dtrain/score.cc
@@ -32,7 +32,7 @@ BleuScorer::Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref
}
score_t
-BleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+BleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
const unsigned /*rank*/, const unsigned /*src_len*/)
{
unsigned hyp_len = hyp.size(), ref_len = ref.size();
@@ -52,7 +52,7 @@ BleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
* NOTE: 0 iff no 1gram match ('grounded')
*/
score_t
-StupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+StupidBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
const unsigned /*rank*/, const unsigned /*src_len*/)
{
unsigned hyp_len = hyp.size(), ref_len = ref.size();
@@ -81,7 +81,7 @@ StupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
* (Nakov et al. '12)
*/
score_t
-FixedStupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+FixedStupidBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
const unsigned /*rank*/, const unsigned /*src_len*/)
{
unsigned hyp_len = hyp.size(), ref_len = ref.size();
@@ -112,7 +112,7 @@ FixedStupidBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
* NOTE: max is 0.9375 (with N=4)
*/
score_t
-SmoothBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+SmoothBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
const unsigned /*rank*/, const unsigned /*src_len*/)
{
unsigned hyp_len = hyp.size(), ref_len = ref.size();
@@ -143,7 +143,7 @@ SmoothBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
* sum up Ngram precisions
*/
score_t
-SumBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+SumBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
const unsigned /*rank*/, const unsigned /*src_len*/)
{
unsigned hyp_len = hyp.size(), ref_len = ref.size();
@@ -167,7 +167,7 @@ SumBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
* sum up exp(Ngram precisions)
*/
score_t
-SumExpBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+SumExpBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
const unsigned /*rank*/, const unsigned /*src_len*/)
{
unsigned hyp_len = hyp.size(), ref_len = ref.size();
@@ -191,7 +191,7 @@ SumExpBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
* sum up exp(weight * log(Ngram precisions))
*/
score_t
-SumWhateverBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+SumWhateverBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
const unsigned /*rank*/, const unsigned /*src_len*/)
{
unsigned hyp_len = hyp.size(), ref_len = ref.size();
@@ -224,7 +224,7 @@ SumWhateverBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
* No scaling by src len.
*/
score_t
-ApproxBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+ApproxBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
const unsigned rank, const unsigned src_len)
{
unsigned hyp_len = hyp.size(), ref_len = ref.size();
@@ -255,7 +255,7 @@ ApproxBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
*
*/
score_t
-LinearBleuScorer::Score(vector<WordID>& hyp, vector<WordID>& ref,
+LinearBleuScorer::Score(const vector<WordID>& hyp, const vector<WordID>& ref,
const unsigned rank, const unsigned /*src_len*/)
{
unsigned hyp_len = hyp.size(), ref_len = ref.size();
diff --git a/training/dtrain/score.h b/training/dtrain/score.h
index 53e970ba..1cdd3fa9 100644
--- a/training/dtrain/score.h
+++ b/training/dtrain/score.h
@@ -138,43 +138,43 @@ make_ngram_counts(const vector<WordID>& hyp, const vector<WordID>& ref, const un
struct BleuScorer : public LocalScorer
{
score_t Bleu(NgramCounts& counts, const unsigned hyp_len, const unsigned ref_len);
- score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+ score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
void Reset() {}
};
struct StupidBleuScorer : public LocalScorer
{
- score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+ score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
void Reset() {}
};
struct FixedStupidBleuScorer : public LocalScorer
{
- score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+ score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
void Reset() {}
};
struct SmoothBleuScorer : public LocalScorer
{
- score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+ score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
void Reset() {}
};
struct SumBleuScorer : public LocalScorer
{
- score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+ score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
void Reset() {}
};
struct SumExpBleuScorer : public LocalScorer
{
- score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+ score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
void Reset() {}
};
struct SumWhateverBleuScorer : public LocalScorer
{
- score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
+ score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned /*rank*/, const unsigned /*src_len*/);
void Reset() {};
};
@@ -194,7 +194,7 @@ struct ApproxBleuScorer : public BleuScorer
glob_hyp_len_ = glob_ref_len_ = glob_src_len_ = 0.;
}
- score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned rank, const unsigned src_len);
+ score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned rank, const unsigned src_len);
};
struct LinearBleuScorer : public BleuScorer
@@ -207,7 +207,7 @@ struct LinearBleuScorer : public BleuScorer
onebest_counts_.One();
}
- score_t Score(vector<WordID>& hyp, vector<WordID>& ref, const unsigned rank, const unsigned /*src_len*/);
+ score_t Score(const vector<WordID>& hyp, const vector<WordID>& ref, const unsigned rank, const unsigned /*src_len*/);
inline void Reset() {
onebest_len_ = 1;