diff options
Diffstat (limited to 'dtrain/test')
| -rw-r--r-- | dtrain/test/EXAMPLE/cdec.ini | 1 | ||||
| -rw-r--r-- | dtrain/test/EXAMPLE/dtrain.ini | 6 | ||||
| -rw-r--r-- | dtrain/test/log_reg/bin_class.cc | 4 | ||||
| -rw-r--r-- | dtrain/test/log_reg/bin_class.h | 22 | ||||
| -rw-r--r-- | dtrain/test/log_reg/log_reg.cc | 39 | ||||
| -rw-r--r-- | dtrain/test/log_reg/log_reg.h | 14 | ||||
| -rw-r--r-- | dtrain/test/nc-wmt11/dtrain.ini | 2 | ||||
| -rw-r--r-- | dtrain/test/toy.dtrain.ini | 3 | ||||
| -rw-r--r-- | dtrain/test/toy.in | 4 | ||||
| -rw-r--r-- | dtrain/test/toy_cdec/cdec.ini | 3 | ||||
| -rw-r--r-- | dtrain/test/toy_cdec/grammar | 12 | ||||
| -rw-r--r-- | dtrain/test/toy_cdec/in | 1 | ||||
| -rw-r--r-- | dtrain/test/toy_cdec/weights | 2 | 
13 files changed, 106 insertions, 7 deletions
| diff --git a/dtrain/test/EXAMPLE/cdec.ini b/dtrain/test/EXAMPLE/cdec.ini index b6e92b5f..e57138b0 100644 --- a/dtrain/test/EXAMPLE/cdec.ini +++ b/dtrain/test/EXAMPLE/cdec.ini @@ -2,5 +2,6 @@ formalism=scfg  add_pass_through_rules=true  feature_function=WordPenalty  cubepruning_pop_limit=30 +feature_function=KLanguageModel data/nc-wmt11.en.srilm.gz  scfg_max_span_limit=15 diff --git a/dtrain/test/EXAMPLE/dtrain.ini b/dtrain/test/EXAMPLE/dtrain.ini index 1467b332..ffafd0b8 100644 --- a/dtrain/test/EXAMPLE/dtrain.ini +++ b/dtrain/test/EXAMPLE/dtrain.ini @@ -1,10 +1,10 @@  decoder_config=test/EXAMPLE/cdec.ini  kbest=100  ngrams=3 -epochs=22 +epochs=8  input=test/EXAMPLE/dtrain.nc-1k  scorer=approx_bleu  output=test/EXAMPLE/weights.gz -stop_after=5 -wprint=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 +stop_after=1000 +wprint=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough diff --git a/dtrain/test/log_reg/bin_class.cc b/dtrain/test/log_reg/bin_class.cc new file mode 100644 index 00000000..19bcde25 --- /dev/null +++ b/dtrain/test/log_reg/bin_class.cc @@ -0,0 +1,4 @@ +#include "bin_class.h" + +Objective::~Objective() {} + diff --git a/dtrain/test/log_reg/bin_class.h b/dtrain/test/log_reg/bin_class.h new file mode 100644 index 00000000..3466109a --- /dev/null +++ b/dtrain/test/log_reg/bin_class.h @@ -0,0 +1,22 @@ +#ifndef _BIN_CLASS_H_ +#define _BIN_CLASS_H_ + +#include <vector> +#include "sparse_vector.h" + +struct TrainingInstance { +  // TODO add other info? loss for MIRA-type updates? +  SparseVector<double> x_feature_map; +  bool y; +}; + +struct Objective { +  virtual ~Objective(); + +  // returns f(x) and f'(x) +  virtual double ObjectiveAndGradient(const SparseVector<double>& x, +                  const std::vector<TrainingInstance>& training_instances, +                  SparseVector<double>* g) const = 0; +}; + +#endif diff --git a/dtrain/test/log_reg/log_reg.cc b/dtrain/test/log_reg/log_reg.cc new file mode 100644 index 00000000..ec2331fe --- /dev/null +++ b/dtrain/test/log_reg/log_reg.cc @@ -0,0 +1,39 @@ +#include "log_reg.h" + +#include <vector> +#include <cmath> + +#include "sparse_vector.h" + +using namespace std; + +double LogisticRegression::ObjectiveAndGradient(const SparseVector<double>& x, +                              const vector<TrainingInstance>& training_instances, +                              SparseVector<double>* g) const { +  double cll = 0; +  for (int i = 0; i < training_instances.size(); ++i) { +    const double dotprod = training_instances[i].x_feature_map.dot(x); // TODO no bias, if bias, add x[0] +    double lp_false = dotprod; +    double lp_true = -dotprod; +    if (0 < lp_true) { +      lp_true += log1p(exp(-lp_true)); +      lp_false = log1p(exp(lp_false)); +    } else { +      lp_true = log1p(exp(lp_true)); +      lp_false += log1p(exp(-lp_false)); +    } +    lp_true *= -1; +    lp_false *= -1; +    if (training_instances[i].y) {  // true label +      cll -= lp_true; +      (*g) -= training_instances[i].x_feature_map * exp(lp_false); +      // (*g)[0] -= exp(lp_false); // bias +    } else {                  // false label +      cll -= lp_false; +      (*g) += training_instances[i].x_feature_map * exp(lp_true); +      // g += corpus[i].second * exp(lp_true); +    } +  } +  return cll; +} + diff --git a/dtrain/test/log_reg/log_reg.h b/dtrain/test/log_reg/log_reg.h new file mode 100644 index 00000000..ecc560b8 --- /dev/null +++ b/dtrain/test/log_reg/log_reg.h @@ -0,0 +1,14 @@ +#ifndef _LOG_REG_H_ +#define _LOG_REG_H_ + +#include <vector> +#include "sparse_vector.h" +#include "bin_class.h" + +struct LogisticRegression : public Objective { +  double ObjectiveAndGradient(const SparseVector<double>& x, +                              const std::vector<TrainingInstance>& training_instances, +                              SparseVector<double>* g) const; +}; + +#endif diff --git a/dtrain/test/nc-wmt11/dtrain.ini b/dtrain/test/nc-wmt11/dtrain.ini index 51033f2d..ddbf5da7 100644 --- a/dtrain/test/nc-wmt11/dtrain.ini +++ b/dtrain/test/nc-wmt11/dtrain.ini @@ -2,7 +2,7 @@ decoder_config=test/nc-wmt11/cdec.ini  kbest=100  ngrams=3  epochs=8 -input=data/nc-wmt11.loo.localf.p0.500.rule-id #nc-wmt11-de-en-dyer-cs-joshua.tok.lc.fixamp1.loo.psg.dtrain.localf.p0 +input=data/nc-wmt11.loo.localf.p0.500.rule-id  scorer=approx_bleu  output=data/w/nc-wmt11.loo.p0.weights.gz  #stop_after=100 diff --git a/dtrain/test/toy.dtrain.ini b/dtrain/test/toy.dtrain.ini index cacb3a2c..35f76281 100644 --- a/dtrain/test/toy.dtrain.ini +++ b/dtrain/test/toy.dtrain.ini @@ -2,8 +2,9 @@ decoder_config=test/cdec.ini  kbest=4  ngrams=1  epochs=3 -input=data/in.toy +input=test/toy.in  scorer=bleu  output=toy.gz  #stop_after=1000 +wprint=logp use_shell use_house PassThrough diff --git a/dtrain/test/toy.in b/dtrain/test/toy.in index 63f97158..989a1f77 100644 --- a/dtrain/test/toy.in +++ b/dtrain/test/toy.in @@ -1,2 +1,2 @@ -0	ich sah ein kleines haus	i saw a little house	[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0 -1	ich fand ein grosses haus	i found a large house	[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT_RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT_RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT_RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT_RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT_RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT_RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT_RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT_RULE__ [V] ||| fand ||| found ||| logp=0 +0	ich sah ein kleines haus	i saw a little house	[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT__RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT__RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT__RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT__RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT__RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT__RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT__RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT__RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT__RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT__RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT__RULE__ [V] ||| fand ||| found ||| logp=0 +1	ich fand ein grosses haus	i found a large house	[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 __NEXT__RULE__ [NP] ||| ich ||| i ||| logp=0 __NEXT__RULE__ [NP] ||| ein [NN,1] ||| a [1] ||| logp=0 __NEXT__RULE__ [NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 __NEXT__RULE__ [NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 __NEXT__RULE__ [JJ] ||| kleines ||| small ||| logp=0 __NEXT__RULE__ [JJ] ||| kleines ||| little ||| logp=0 __NEXT__RULE__ [JJ] ||| grosses ||| big ||| logp=0 __NEXT__RULE__ [JJ] ||| grosses ||| large ||| logp=0 __NEXT__RULE__ [VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 __NEXT__RULE__ [V] ||| sah ||| saw ||| logp=0 __NEXT__RULE__ [V] ||| fand ||| found ||| logp=0 diff --git a/dtrain/test/toy_cdec/cdec.ini b/dtrain/test/toy_cdec/cdec.ini new file mode 100644 index 00000000..3a6bab68 --- /dev/null +++ b/dtrain/test/toy_cdec/cdec.ini @@ -0,0 +1,3 @@ +formalism=scfg +grammar=../dtrain/test/toy_cdec/grammar +add_pass_through_rules=true diff --git a/dtrain/test/toy_cdec/grammar b/dtrain/test/toy_cdec/grammar new file mode 100644 index 00000000..aeed75ef --- /dev/null +++ b/dtrain/test/toy_cdec/grammar @@ -0,0 +1,12 @@ +[S] ||| [NP,1] [VP,2] ||| [1] [2] ||| logp=0 +[NP] ||| ich ||| i ||| logp=0 +[NP] ||| ein [NN,1] ||| a [1] ||| logp=0 +[NN] ||| [JJ,1] haus ||| [1] house ||| logp=0 use_house=1 +[NN] ||| [JJ,1] haus ||| [1] shell ||| logp=0 use_shell=1 +[JJ] ||| kleines ||| small ||| logp=0 +[JJ] ||| kleines ||| little ||| logp=0 +[JJ] ||| grosses ||| big ||| logp=0 +[JJ] ||| grosses ||| large ||| logp=0 +[VP] ||| [V,1] [NP,2] ||| [1] [2] ||| logp=0 +[V] ||| sah ||| saw ||| logp=0 +[V] ||| fand ||| found ||| logp=0 diff --git a/dtrain/test/toy_cdec/in b/dtrain/test/toy_cdec/in new file mode 100644 index 00000000..e6df9275 --- /dev/null +++ b/dtrain/test/toy_cdec/in @@ -0,0 +1 @@ +ich sah ein kleines haus diff --git a/dtrain/test/toy_cdec/weights b/dtrain/test/toy_cdec/weights new file mode 100644 index 00000000..10d7ed83 --- /dev/null +++ b/dtrain/test/toy_cdec/weights @@ -0,0 +1,2 @@ +logp 1 +use_shell 1 | 
