From bb602ade9c970272f281cd8a42b643cc09a9fa29 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Mon, 14 Nov 2011 01:20:50 +0100
Subject: l1 regularzation
---
dtrain/dtrain.cc | 51 +++++++++++++++++++++++++++++++++++++++++-
dtrain/dtrain.h | 6 +++++
dtrain/test/example/dtrain.ini | 6 +++--
3 files changed, 60 insertions(+), 3 deletions(-)
diff --git a/dtrain/dtrain.cc b/dtrain/dtrain.cc
index ca5f0c5e..448e639c 100644
--- a/dtrain/dtrain.cc
+++ b/dtrain/dtrain.cc
@@ -26,6 +26,8 @@ dtrain_init(int argc, char** argv, po::variables_map* cfg)
("select_weights", po::value()->default_value("last"), "output 'best' or 'last' weights ('VOID' to throw away)")
("keep_w", po::value()->zero_tokens(), "protocol weights for each iteration")
("unit_weight_vector", po::value()->zero_tokens(), "Rescale weight vector after each input")
+ ("l1_reg", po::value()->default_value("no"), "apply l1 regularization as in Tsuroka et al 2010")
+ ("l1_reg_strength", po::value(), "l1 regularization strength")
#ifdef DTRAIN_LOCAL
("refs,r", po::value(), "references in local mode")
#endif
@@ -155,13 +157,25 @@ main(int argc, char** argv)
// init weights
vector& dense_weights = decoder.CurrentWeightVector();
- SparseVector lambdas;
+ SparseVector lambdas, cumulative_penalties;
if (cfg.count("input_weights")) Weights::InitFromFile(cfg["input_weights"].as(), &dense_weights);
Weights::InitSparseVector(dense_weights, &lambdas);
// meta params for perceptron, SVM
weight_t eta = cfg["learning_rate"].as();
weight_t gamma = cfg["gamma"].as();
+ // l1 regularization
+ bool l1naive = false;
+ bool l1clip = false;
+ bool l1cumul = false;
+ weight_t l1_reg = 0;
+ if (cfg["l1_reg"].as() != "no") {
+ string s = cfg["l1_reg"].as();
+ if (s == "naive") l1naive = true;
+ else if (s == "clip") l1clip = true;
+ else if (s == "cumul") l1cumul = true;
+ l1_reg = cfg["l1_reg_strength"].as();
+ }
// output
string output_fn = cfg["output"].as();
@@ -388,6 +402,41 @@ main(int argc, char** argv)
lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./npairs));
}
}
+
+ // reset cumulative_penalties after 1 iter?
+ // do this only once per INPUT (not per pair)
+ if (l1naive) {
+ for (unsigned d = 0; d < lambdas.size(); d++) {
+ weight_t v = lambdas.get(d);
+ lambdas.set_value(d, v - sign(v) * l1_reg);
+ }
+ } else if (l1clip) {
+ for (unsigned d = 0; d < lambdas.size(); d++) {
+ if (lambdas.nonzero(d)) {
+ weight_t v = lambdas.get(d);
+ if (v > 0) {
+ lambdas.set_value(d, max(0., v - l1_reg));
+ } else {
+ lambdas.set_value(d, min(0., v + l1_reg));
+ }
+ }
+ }
+ } else if (l1cumul) {
+ weight_t acc_penalty = (ii+1) * l1_reg; // ii is the index of the current input
+ for (unsigned d = 0; d < lambdas.size(); d++) {
+ if (lambdas.nonzero(d)) {
+ weight_t v = lambdas.get(d);
+ weight_t penalty = 0;
+ if (v > 0) {
+ penalty = max(0., v-(acc_penalty + cumulative_penalties.get(d)));
+ } else {
+ penalty = min(0., v+(acc_penalty - cumulative_penalties.get(d)));
+ }
+ lambdas.set_value(d, penalty);
+ cumulative_penalties.set_value(d, cumulative_penalties.get(d)+penalty);
+ }
+ }
+ }
}
if (unit_weight_vector && sample_from == "forest") lambdas /= lambdas.l2norm();
diff --git a/dtrain/dtrain.h b/dtrain/dtrain.h
index 84f3f1f5..cfc3f460 100644
--- a/dtrain/dtrain.h
+++ b/dtrain/dtrain.h
@@ -85,5 +85,11 @@ inline void printWordIDVec(vector& v)
}
}
+template
+inline T sign(T z) {
+ if (z == 0) return 0;
+ return z < 0 ? -1 : +1;
+}
+
#endif
diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini
index 95eeb8e5..900878a5 100644
--- a/dtrain/test/example/dtrain.ini
+++ b/dtrain/test/example/dtrain.ini
@@ -6,11 +6,13 @@ epochs=20
input=test/example/nc-1k-tabs.gz
scorer=stupid_bleu
output=weights.gz
-stop_after=10
+stop_after=100
sample_from=forest
pair_sampling=108010
select_weights=VOID
print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough
tmp=/tmp
-unit_weight_vector=true
+#unit_weight_vector=
keep_w=true
+#l1_reg=clip
+#l1_reg_strength=0.00001
--
cgit v1.2.3