From fe471bb707226052551d75b043295ca5f57261c0 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Sun, 25 Sep 2011 20:23:09 +0200
Subject: removed some quirks, less boost, prettier code, score_t

---
 dtrain/test/cdec_toy/cdec.ini            |   1 +
 dtrain/test/example/cdec.ini             |   7 ++++++
 dtrain/test/example/dtrain.ini           |  11 +++++++++
 dtrain/test/example/nc-1k-tabs.gz        | Bin 0 -> 21185883 bytes
 dtrain/test/example/nc-1k.gz             | Bin 0 -> 21474865 bytes
 dtrain/test/example/nc-wmt11.en.srilm.gz | Bin 0 -> 16017291 bytes
 dtrain/test/example/weights.gz           | Bin 0 -> 255 bytes
 dtrain/test/log_reg_dyer/bin_class.cc    |   4 ++++
 dtrain/test/log_reg_dyer/bin_class.h     |  22 +++++++++++++++++
 dtrain/test/log_reg_dyer/log_reg.cc      |  39 +++++++++++++++++++++++++++++++
 dtrain/test/log_reg_dyer/log_reg.h       |  14 +++++++++++
 dtrain/test/logreg/bin_class.cc          |   4 ----
 dtrain/test/logreg/bin_class.h           |  22 -----------------
 dtrain/test/logreg/log_reg.cc            |  39 -------------------------------
 dtrain/test/logreg/log_reg.h             |  14 -----------
 dtrain/test/toy_example/dtrain.ini       |   2 +-
 16 files changed, 99 insertions(+), 80 deletions(-)
 create mode 100644 dtrain/test/example/cdec.ini
 create mode 100644 dtrain/test/example/dtrain.ini
 create mode 100644 dtrain/test/example/nc-1k-tabs.gz
 create mode 100644 dtrain/test/example/nc-1k.gz
 create mode 100644 dtrain/test/example/nc-wmt11.en.srilm.gz
 create mode 100644 dtrain/test/example/weights.gz
 create mode 100644 dtrain/test/log_reg_dyer/bin_class.cc
 create mode 100644 dtrain/test/log_reg_dyer/bin_class.h
 create mode 100644 dtrain/test/log_reg_dyer/log_reg.cc
 create mode 100644 dtrain/test/log_reg_dyer/log_reg.h
 delete mode 100644 dtrain/test/logreg/bin_class.cc
 delete mode 100644 dtrain/test/logreg/bin_class.h
 delete mode 100644 dtrain/test/logreg/log_reg.cc
 delete mode 100644 dtrain/test/logreg/log_reg.h

(limited to 'dtrain/test')

diff --git a/dtrain/test/cdec_toy/cdec.ini b/dtrain/test/cdec_toy/cdec.ini
index 3a6bab68..9eb34512 100644
--- a/dtrain/test/cdec_toy/cdec.ini
+++ b/dtrain/test/cdec_toy/cdec.ini
@@ -1,3 +1,4 @@
 formalism=scfg
 grammar=../dtrain/test/toy_cdec/grammar
 add_pass_through_rules=true
+weights=../dtrain/test/toy_cdec/weights
diff --git a/dtrain/test/example/cdec.ini b/dtrain/test/example/cdec.ini
new file mode 100644
index 00000000..cdc8a8bb
--- /dev/null
+++ b/dtrain/test/example/cdec.ini
@@ -0,0 +1,7 @@
+formalism=scfg
+add_pass_through_rules=true
+cubepruning_pop_limit=30
+scfg_max_span_limit=15
+feature_function=WordPenalty
+feature_function=KLanguageModel /home/pks/z/X/x/dtrain/test/example/nc-wmt11.en.srilm.gz
+#feature_function=RuleIdentityFeatures
diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini
new file mode 100644
index 00000000..aee3c89e
--- /dev/null
+++ b/dtrain/test/example/dtrain.ini
@@ -0,0 +1,11 @@
+decoder_config=test/example/cdec.ini
+ksamples=100
+ngrams=3
+epochs=1000
+input=test/example/nc-1k.gz
+scorer=stupid_bleu
+output=test/example/weights.gz
+stop_after=10
+sample_from=kbest
+pair_sampling=all
+print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough
diff --git a/dtrain/test/example/nc-1k-tabs.gz b/dtrain/test/example/nc-1k-tabs.gz
new file mode 100644
index 00000000..45496cd8
Binary files /dev/null and b/dtrain/test/example/nc-1k-tabs.gz differ
diff --git a/dtrain/test/example/nc-1k.gz b/dtrain/test/example/nc-1k.gz
new file mode 100644
index 00000000..f638a166
Binary files /dev/null and b/dtrain/test/example/nc-1k.gz differ
diff --git a/dtrain/test/example/nc-wmt11.en.srilm.gz b/dtrain/test/example/nc-wmt11.en.srilm.gz
new file mode 100644
index 00000000..7ce81057
Binary files /dev/null and b/dtrain/test/example/nc-wmt11.en.srilm.gz differ
diff --git a/dtrain/test/example/weights.gz b/dtrain/test/example/weights.gz
new file mode 100644
index 00000000..21157427
Binary files /dev/null and b/dtrain/test/example/weights.gz differ
diff --git a/dtrain/test/log_reg_dyer/bin_class.cc b/dtrain/test/log_reg_dyer/bin_class.cc
new file mode 100644
index 00000000..19bcde25
--- /dev/null
+++ b/dtrain/test/log_reg_dyer/bin_class.cc
@@ -0,0 +1,4 @@
+#include "bin_class.h"
+
+Objective::~Objective() {}
+
diff --git a/dtrain/test/log_reg_dyer/bin_class.h b/dtrain/test/log_reg_dyer/bin_class.h
new file mode 100644
index 00000000..3466109a
--- /dev/null
+++ b/dtrain/test/log_reg_dyer/bin_class.h
@@ -0,0 +1,22 @@
+#ifndef _BIN_CLASS_H_
+#define _BIN_CLASS_H_
+
+#include <vector>
+#include "sparse_vector.h"
+
+struct TrainingInstance {
+  // TODO add other info? loss for MIRA-type updates?
+  SparseVector<double> x_feature_map;
+  bool y;
+};
+
+struct Objective {
+  virtual ~Objective();
+
+  // returns f(x) and f'(x)
+  virtual double ObjectiveAndGradient(const SparseVector<double>& x,
+                  const std::vector<TrainingInstance>& training_instances,
+                  SparseVector<double>* g) const = 0;
+};
+
+#endif
diff --git a/dtrain/test/log_reg_dyer/log_reg.cc b/dtrain/test/log_reg_dyer/log_reg.cc
new file mode 100644
index 00000000..ec2331fe
--- /dev/null
+++ b/dtrain/test/log_reg_dyer/log_reg.cc
@@ -0,0 +1,39 @@
+#include "log_reg.h"
+
+#include <vector>
+#include <cmath>
+
+#include "sparse_vector.h"
+
+using namespace std;
+
+double LogisticRegression::ObjectiveAndGradient(const SparseVector<double>& x,
+                              const vector<TrainingInstance>& training_instances,
+                              SparseVector<double>* g) const {
+  double cll = 0;
+  for (int i = 0; i < training_instances.size(); ++i) {
+    const double dotprod = training_instances[i].x_feature_map.dot(x); // TODO no bias, if bias, add x[0]
+    double lp_false = dotprod;
+    double lp_true = -dotprod;
+    if (0 < lp_true) {
+      lp_true += log1p(exp(-lp_true));
+      lp_false = log1p(exp(lp_false));
+    } else {
+      lp_true = log1p(exp(lp_true));
+      lp_false += log1p(exp(-lp_false));
+    }
+    lp_true *= -1;
+    lp_false *= -1;
+    if (training_instances[i].y) {  // true label
+      cll -= lp_true;
+      (*g) -= training_instances[i].x_feature_map * exp(lp_false);
+      // (*g)[0] -= exp(lp_false); // bias
+    } else {                  // false label
+      cll -= lp_false;
+      (*g) += training_instances[i].x_feature_map * exp(lp_true);
+      // g += corpus[i].second * exp(lp_true);
+    }
+  }
+  return cll;
+}
+
diff --git a/dtrain/test/log_reg_dyer/log_reg.h b/dtrain/test/log_reg_dyer/log_reg.h
new file mode 100644
index 00000000..ecc560b8
--- /dev/null
+++ b/dtrain/test/log_reg_dyer/log_reg.h
@@ -0,0 +1,14 @@
+#ifndef _LOG_REG_H_
+#define _LOG_REG_H_
+
+#include <vector>
+#include "sparse_vector.h"
+#include "bin_class.h"
+
+struct LogisticRegression : public Objective {
+  double ObjectiveAndGradient(const SparseVector<double>& x,
+                              const std::vector<TrainingInstance>& training_instances,
+                              SparseVector<double>* g) const;
+};
+
+#endif
diff --git a/dtrain/test/logreg/bin_class.cc b/dtrain/test/logreg/bin_class.cc
deleted file mode 100644
index 19bcde25..00000000
--- a/dtrain/test/logreg/bin_class.cc
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "bin_class.h"
-
-Objective::~Objective() {}
-
diff --git a/dtrain/test/logreg/bin_class.h b/dtrain/test/logreg/bin_class.h
deleted file mode 100644
index 3466109a..00000000
--- a/dtrain/test/logreg/bin_class.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef _BIN_CLASS_H_
-#define _BIN_CLASS_H_
-
-#include <vector>
-#include "sparse_vector.h"
-
-struct TrainingInstance {
-  // TODO add other info? loss for MIRA-type updates?
-  SparseVector<double> x_feature_map;
-  bool y;
-};
-
-struct Objective {
-  virtual ~Objective();
-
-  // returns f(x) and f'(x)
-  virtual double ObjectiveAndGradient(const SparseVector<double>& x,
-                  const std::vector<TrainingInstance>& training_instances,
-                  SparseVector<double>* g) const = 0;
-};
-
-#endif
diff --git a/dtrain/test/logreg/log_reg.cc b/dtrain/test/logreg/log_reg.cc
deleted file mode 100644
index ec2331fe..00000000
--- a/dtrain/test/logreg/log_reg.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "log_reg.h"
-
-#include <vector>
-#include <cmath>
-
-#include "sparse_vector.h"
-
-using namespace std;
-
-double LogisticRegression::ObjectiveAndGradient(const SparseVector<double>& x,
-                              const vector<TrainingInstance>& training_instances,
-                              SparseVector<double>* g) const {
-  double cll = 0;
-  for (int i = 0; i < training_instances.size(); ++i) {
-    const double dotprod = training_instances[i].x_feature_map.dot(x); // TODO no bias, if bias, add x[0]
-    double lp_false = dotprod;
-    double lp_true = -dotprod;
-    if (0 < lp_true) {
-      lp_true += log1p(exp(-lp_true));
-      lp_false = log1p(exp(lp_false));
-    } else {
-      lp_true = log1p(exp(lp_true));
-      lp_false += log1p(exp(-lp_false));
-    }
-    lp_true *= -1;
-    lp_false *= -1;
-    if (training_instances[i].y) {  // true label
-      cll -= lp_true;
-      (*g) -= training_instances[i].x_feature_map * exp(lp_false);
-      // (*g)[0] -= exp(lp_false); // bias
-    } else {                  // false label
-      cll -= lp_false;
-      (*g) += training_instances[i].x_feature_map * exp(lp_true);
-      // g += corpus[i].second * exp(lp_true);
-    }
-  }
-  return cll;
-}
-
diff --git a/dtrain/test/logreg/log_reg.h b/dtrain/test/logreg/log_reg.h
deleted file mode 100644
index ecc560b8..00000000
--- a/dtrain/test/logreg/log_reg.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _LOG_REG_H_
-#define _LOG_REG_H_
-
-#include <vector>
-#include "sparse_vector.h"
-#include "bin_class.h"
-
-struct LogisticRegression : public Objective {
-  double ObjectiveAndGradient(const SparseVector<double>& x,
-                              const std::vector<TrainingInstance>& training_instances,
-                              SparseVector<double>* g) const;
-};
-
-#endif
diff --git a/dtrain/test/toy_example/dtrain.ini b/dtrain/test/toy_example/dtrain.ini
index 0cc222e1..3ab4f8d4 100644
--- a/dtrain/test/toy_example/dtrain.ini
+++ b/dtrain/test/toy_example/dtrain.ini
@@ -1,5 +1,5 @@
 decoder_config=test/toy_example/cdec.ini
-kbest=4
+ksamples=4
 ngrams=3
 epochs=2
 input=test/toy_example/toy.in
-- 
cgit v1.2.3