From 43e7ecdca09f4125346f64d45e44f440ac964421 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Sun, 25 Sep 2011 20:23:09 +0200 Subject: removed some quirks, less boost, prettier code, score_t --- dtrain/test/cdec_toy/cdec.ini | 1 + dtrain/test/example/cdec.ini | 7 ++++++ dtrain/test/example/dtrain.ini | 11 +++++++++ dtrain/test/example/nc-1k-tabs.gz | Bin 0 -> 21185883 bytes dtrain/test/example/nc-1k.gz | Bin 0 -> 21474865 bytes dtrain/test/example/nc-wmt11.en.srilm.gz | Bin 0 -> 16017291 bytes dtrain/test/example/weights.gz | Bin 0 -> 255 bytes dtrain/test/log_reg_dyer/bin_class.cc | 4 ++++ dtrain/test/log_reg_dyer/bin_class.h | 22 +++++++++++++++++ dtrain/test/log_reg_dyer/log_reg.cc | 39 +++++++++++++++++++++++++++++++ dtrain/test/log_reg_dyer/log_reg.h | 14 +++++++++++ dtrain/test/logreg/bin_class.cc | 4 ---- dtrain/test/logreg/bin_class.h | 22 ----------------- dtrain/test/logreg/log_reg.cc | 39 ------------------------------- dtrain/test/logreg/log_reg.h | 14 ----------- dtrain/test/toy_example/dtrain.ini | 2 +- 16 files changed, 99 insertions(+), 80 deletions(-) create mode 100644 dtrain/test/example/cdec.ini create mode 100644 dtrain/test/example/dtrain.ini create mode 100644 dtrain/test/example/nc-1k-tabs.gz create mode 100644 dtrain/test/example/nc-1k.gz create mode 100644 dtrain/test/example/nc-wmt11.en.srilm.gz create mode 100644 dtrain/test/example/weights.gz create mode 100644 dtrain/test/log_reg_dyer/bin_class.cc create mode 100644 dtrain/test/log_reg_dyer/bin_class.h create mode 100644 dtrain/test/log_reg_dyer/log_reg.cc create mode 100644 dtrain/test/log_reg_dyer/log_reg.h delete mode 100644 dtrain/test/logreg/bin_class.cc delete mode 100644 dtrain/test/logreg/bin_class.h delete mode 100644 dtrain/test/logreg/log_reg.cc delete mode 100644 dtrain/test/logreg/log_reg.h (limited to 'dtrain/test') diff --git a/dtrain/test/cdec_toy/cdec.ini b/dtrain/test/cdec_toy/cdec.ini index 3a6bab68..9eb34512 100644 --- a/dtrain/test/cdec_toy/cdec.ini +++ b/dtrain/test/cdec_toy/cdec.ini @@ -1,3 +1,4 @@ formalism=scfg grammar=../dtrain/test/toy_cdec/grammar add_pass_through_rules=true +weights=../dtrain/test/toy_cdec/weights diff --git a/dtrain/test/example/cdec.ini b/dtrain/test/example/cdec.ini new file mode 100644 index 00000000..cdc8a8bb --- /dev/null +++ b/dtrain/test/example/cdec.ini @@ -0,0 +1,7 @@ +formalism=scfg +add_pass_through_rules=true +cubepruning_pop_limit=30 +scfg_max_span_limit=15 +feature_function=WordPenalty +feature_function=KLanguageModel /home/pks/z/X/x/dtrain/test/example/nc-wmt11.en.srilm.gz +#feature_function=RuleIdentityFeatures diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini new file mode 100644 index 00000000..aee3c89e --- /dev/null +++ b/dtrain/test/example/dtrain.ini @@ -0,0 +1,11 @@ +decoder_config=test/example/cdec.ini +ksamples=100 +ngrams=3 +epochs=1000 +input=test/example/nc-1k.gz +scorer=stupid_bleu +output=test/example/weights.gz +stop_after=10 +sample_from=kbest +pair_sampling=all +print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough diff --git a/dtrain/test/example/nc-1k-tabs.gz b/dtrain/test/example/nc-1k-tabs.gz new file mode 100644 index 00000000..45496cd8 Binary files /dev/null and b/dtrain/test/example/nc-1k-tabs.gz differ diff --git a/dtrain/test/example/nc-1k.gz b/dtrain/test/example/nc-1k.gz new file mode 100644 index 00000000..f638a166 Binary files /dev/null and b/dtrain/test/example/nc-1k.gz differ diff --git a/dtrain/test/example/nc-wmt11.en.srilm.gz b/dtrain/test/example/nc-wmt11.en.srilm.gz new file mode 100644 index 00000000..7ce81057 Binary files /dev/null and b/dtrain/test/example/nc-wmt11.en.srilm.gz differ diff --git a/dtrain/test/example/weights.gz b/dtrain/test/example/weights.gz new file mode 100644 index 00000000..21157427 Binary files /dev/null and b/dtrain/test/example/weights.gz differ diff --git a/dtrain/test/log_reg_dyer/bin_class.cc b/dtrain/test/log_reg_dyer/bin_class.cc new file mode 100644 index 00000000..19bcde25 --- /dev/null +++ b/dtrain/test/log_reg_dyer/bin_class.cc @@ -0,0 +1,4 @@ +#include "bin_class.h" + +Objective::~Objective() {} + diff --git a/dtrain/test/log_reg_dyer/bin_class.h b/dtrain/test/log_reg_dyer/bin_class.h new file mode 100644 index 00000000..3466109a --- /dev/null +++ b/dtrain/test/log_reg_dyer/bin_class.h @@ -0,0 +1,22 @@ +#ifndef _BIN_CLASS_H_ +#define _BIN_CLASS_H_ + +#include +#include "sparse_vector.h" + +struct TrainingInstance { + // TODO add other info? loss for MIRA-type updates? + SparseVector x_feature_map; + bool y; +}; + +struct Objective { + virtual ~Objective(); + + // returns f(x) and f'(x) + virtual double ObjectiveAndGradient(const SparseVector& x, + const std::vector& training_instances, + SparseVector* g) const = 0; +}; + +#endif diff --git a/dtrain/test/log_reg_dyer/log_reg.cc b/dtrain/test/log_reg_dyer/log_reg.cc new file mode 100644 index 00000000..ec2331fe --- /dev/null +++ b/dtrain/test/log_reg_dyer/log_reg.cc @@ -0,0 +1,39 @@ +#include "log_reg.h" + +#include +#include + +#include "sparse_vector.h" + +using namespace std; + +double LogisticRegression::ObjectiveAndGradient(const SparseVector& x, + const vector& training_instances, + SparseVector* g) const { + double cll = 0; + for (int i = 0; i < training_instances.size(); ++i) { + const double dotprod = training_instances[i].x_feature_map.dot(x); // TODO no bias, if bias, add x[0] + double lp_false = dotprod; + double lp_true = -dotprod; + if (0 < lp_true) { + lp_true += log1p(exp(-lp_true)); + lp_false = log1p(exp(lp_false)); + } else { + lp_true = log1p(exp(lp_true)); + lp_false += log1p(exp(-lp_false)); + } + lp_true *= -1; + lp_false *= -1; + if (training_instances[i].y) { // true label + cll -= lp_true; + (*g) -= training_instances[i].x_feature_map * exp(lp_false); + // (*g)[0] -= exp(lp_false); // bias + } else { // false label + cll -= lp_false; + (*g) += training_instances[i].x_feature_map * exp(lp_true); + // g += corpus[i].second * exp(lp_true); + } + } + return cll; +} + diff --git a/dtrain/test/log_reg_dyer/log_reg.h b/dtrain/test/log_reg_dyer/log_reg.h new file mode 100644 index 00000000..ecc560b8 --- /dev/null +++ b/dtrain/test/log_reg_dyer/log_reg.h @@ -0,0 +1,14 @@ +#ifndef _LOG_REG_H_ +#define _LOG_REG_H_ + +#include +#include "sparse_vector.h" +#include "bin_class.h" + +struct LogisticRegression : public Objective { + double ObjectiveAndGradient(const SparseVector& x, + const std::vector& training_instances, + SparseVector* g) const; +}; + +#endif diff --git a/dtrain/test/logreg/bin_class.cc b/dtrain/test/logreg/bin_class.cc deleted file mode 100644 index 19bcde25..00000000 --- a/dtrain/test/logreg/bin_class.cc +++ /dev/null @@ -1,4 +0,0 @@ -#include "bin_class.h" - -Objective::~Objective() {} - diff --git a/dtrain/test/logreg/bin_class.h b/dtrain/test/logreg/bin_class.h deleted file mode 100644 index 3466109a..00000000 --- a/dtrain/test/logreg/bin_class.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _BIN_CLASS_H_ -#define _BIN_CLASS_H_ - -#include -#include "sparse_vector.h" - -struct TrainingInstance { - // TODO add other info? loss for MIRA-type updates? - SparseVector x_feature_map; - bool y; -}; - -struct Objective { - virtual ~Objective(); - - // returns f(x) and f'(x) - virtual double ObjectiveAndGradient(const SparseVector& x, - const std::vector& training_instances, - SparseVector* g) const = 0; -}; - -#endif diff --git a/dtrain/test/logreg/log_reg.cc b/dtrain/test/logreg/log_reg.cc deleted file mode 100644 index ec2331fe..00000000 --- a/dtrain/test/logreg/log_reg.cc +++ /dev/null @@ -1,39 +0,0 @@ -#include "log_reg.h" - -#include -#include - -#include "sparse_vector.h" - -using namespace std; - -double LogisticRegression::ObjectiveAndGradient(const SparseVector& x, - const vector& training_instances, - SparseVector* g) const { - double cll = 0; - for (int i = 0; i < training_instances.size(); ++i) { - const double dotprod = training_instances[i].x_feature_map.dot(x); // TODO no bias, if bias, add x[0] - double lp_false = dotprod; - double lp_true = -dotprod; - if (0 < lp_true) { - lp_true += log1p(exp(-lp_true)); - lp_false = log1p(exp(lp_false)); - } else { - lp_true = log1p(exp(lp_true)); - lp_false += log1p(exp(-lp_false)); - } - lp_true *= -1; - lp_false *= -1; - if (training_instances[i].y) { // true label - cll -= lp_true; - (*g) -= training_instances[i].x_feature_map * exp(lp_false); - // (*g)[0] -= exp(lp_false); // bias - } else { // false label - cll -= lp_false; - (*g) += training_instances[i].x_feature_map * exp(lp_true); - // g += corpus[i].second * exp(lp_true); - } - } - return cll; -} - diff --git a/dtrain/test/logreg/log_reg.h b/dtrain/test/logreg/log_reg.h deleted file mode 100644 index ecc560b8..00000000 --- a/dtrain/test/logreg/log_reg.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _LOG_REG_H_ -#define _LOG_REG_H_ - -#include -#include "sparse_vector.h" -#include "bin_class.h" - -struct LogisticRegression : public Objective { - double ObjectiveAndGradient(const SparseVector& x, - const std::vector& training_instances, - SparseVector* g) const; -}; - -#endif diff --git a/dtrain/test/toy_example/dtrain.ini b/dtrain/test/toy_example/dtrain.ini index 0cc222e1..3ab4f8d4 100644 --- a/dtrain/test/toy_example/dtrain.ini +++ b/dtrain/test/toy_example/dtrain.ini @@ -1,5 +1,5 @@ decoder_config=test/toy_example/cdec.ini -kbest=4 +ksamples=4 ngrams=3 epochs=2 input=test/toy_example/toy.in -- cgit v1.2.3