diff options
Diffstat (limited to 'training/dtrain')
| -rw-r--r-- | training/dtrain/dtrain.cc | 36 | ||||
| -rw-r--r-- | training/dtrain/examples/standard/dtrain.ini | 3 | 
2 files changed, 28 insertions, 11 deletions
| diff --git a/training/dtrain/dtrain.cc b/training/dtrain/dtrain.cc index 34c0a54a..2d090666 100644 --- a/training/dtrain/dtrain.cc +++ b/training/dtrain/dtrain.cc @@ -372,7 +372,8 @@ main(int argc, char** argv)          PROsampling(samples, pairs, pair_threshold, max_pairs);        npairs += pairs.size(); -      SparseVector<weight_t> lambdas_copy; +      SparseVector<weight_t> lambdas_copy; // for l1 regularization +      SparseVector<weight_t> sum_up; // for pclr        if (l1naive||l1clip||l1cumul) lambdas_copy = lambdas;        for (vector<pair<ScoredHyp,ScoredHyp> >::iterator it = pairs.begin(); @@ -392,20 +393,24 @@ main(int argc, char** argv)          if (rank_error || margin < loss_margin) {            SparseVector<weight_t> diff_vec = it->first.f - it->second.f;            if (pclr) { -            SparseVector<weight_t>::iterator jt = diff_vec.begin(); -            for (; jt != diff_vec.end(); ++it) { -              jt->second *= max(0.0000001, eta/(eta+learning_rates[jt->first])); // FIXME -              learning_rates[jt->first]++; -            } -            lambdas += diff_vec; -            } else { -              lambdas.plus_eq_v_times_s(diff_vec, eta); -            } +            sum_up += diff_vec; +          } else { +            lambdas.plus_eq_v_times_s(diff_vec, eta); +          }            if (gamma)              lambdas.plus_eq_v_times_s(lambdas, -2*gamma*eta*(1./npairs));          }        } +      // per-coordinate learning rate +      if (pclr) { +        SparseVector<weight_t>::iterator it = sum_up.begin(); +        for (; it != lambdas.end(); ++it) { +          lambdas[it->first] += it->second * max(0.00000001, eta/(eta+learning_rates[it->first])); +          learning_rates[it->first]++; +        } +      } +        // l1 regularization        // please note that this regularizations happen        // after a _sentence_ -- not after each example/pair! @@ -413,6 +418,8 @@ main(int argc, char** argv)          SparseVector<weight_t>::iterator it = lambdas.begin();          for (; it != lambdas.end(); ++it) {            if (!lambdas_copy.get(it->first) || lambdas_copy.get(it->first)!=it->second) { +              it->second *= max(0.0000001, eta/(eta+learning_rates[it->first])); // FIXME +              learning_rates[it->first]++;              it->second -= sign(it->second) * l1_reg;            }          } @@ -530,6 +537,15 @@ main(int argc, char** argv)      Weights::WriteToFile(w_fn, dense_weights, true);    } +  WriteFile of("-"); +  ostream& o = *of.stream(); +  o << "<<<<<<<<<<<<<<<<<<<<<<<<\n"; +  for (SparseVector<weight_t>::iterator it = learning_rates.begin(); it != learning_rates.end(); ++it) { +	  if (it->second == 0) continue; +      o << FD::Convert(it->first) << '\t' << it->second << endl; +  } +  o << ">>>>>>>>>>>>>>>>>>>>>>>>>\n"; +    } // outer loop    if (average) w_average /= (weight_t)T; diff --git a/training/dtrain/examples/standard/dtrain.ini b/training/dtrain/examples/standard/dtrain.ini index 23e94285..07350a0b 100644 --- a/training/dtrain/examples/standard/dtrain.ini +++ b/training/dtrain/examples/standard/dtrain.ini @@ -1,6 +1,6 @@  input=./nc-wmt11.de.gz  refs=./nc-wmt11.en.gz -output=-                  # a weights file (add .gz for gzip compression) or STDOUT '-' +output=asdf                  # a weights file (add .gz for gzip compression) or STDOUT '-'  select_weights=VOID       # output average (over epochs) weight vector  decoder_config=./cdec.ini # config for cdec  # weights for these features will be printed on each iteration @@ -22,3 +22,4 @@ pair_sampling=XYX        #  hi_lo=0.1                # 10 vs 80 vs 10 and 80 vs 10 here  pair_threshold=0         # minimum distance in BLEU (here: > 0)  loss_margin=0            # update if correctly ranked, but within this margin +pclr=1 | 
