From 574e2336348e5d3960b3232209d01845b40e6ea8 Mon Sep 17 00:00:00 2001
From: Patrick Simianer <p@simianer.de>
Date: Mon, 21 Nov 2011 12:21:08 +0100
Subject: added pro stuff,clean up

---
 dtrain/test/example/cdec.ini          |   5 +++--
 dtrain/test/example/dtrain.ini        |  24 +++++++++++----------
 dtrain/test/example/nc-1k-tabs.gz     | Bin 21185883 -> 0 bytes
 dtrain/test/example/nc-1k.gz          | Bin 21474865 -> 0 bytes
 dtrain/test/example/nc-wmt11.1k.gz    | Bin 0 -> 21185883 bytes
 dtrain/test/log_reg_dyer/bin_class.cc |   4 ----
 dtrain/test/log_reg_dyer/bin_class.h  |  22 -------------------
 dtrain/test/log_reg_dyer/log_reg.cc   |  39 ----------------------------------
 dtrain/test/log_reg_dyer/log_reg.h    |  14 ------------
 dtrain/test/logreg_cd/bin_class.cc    |   4 ++++
 dtrain/test/logreg_cd/bin_class.h     |  22 +++++++++++++++++++
 dtrain/test/logreg_cd/log_reg.cc      |  39 ++++++++++++++++++++++++++++++++++
 dtrain/test/logreg_cd/log_reg.h       |  14 ++++++++++++
 dtrain/test/toy/dtrain.ini            |   4 ++--
 14 files changed, 97 insertions(+), 94 deletions(-)
 delete mode 100644 dtrain/test/example/nc-1k-tabs.gz
 delete mode 100644 dtrain/test/example/nc-1k.gz
 create mode 100644 dtrain/test/example/nc-wmt11.1k.gz
 delete mode 100644 dtrain/test/log_reg_dyer/bin_class.cc
 delete mode 100644 dtrain/test/log_reg_dyer/bin_class.h
 delete mode 100644 dtrain/test/log_reg_dyer/log_reg.cc
 delete mode 100644 dtrain/test/log_reg_dyer/log_reg.h
 create mode 100644 dtrain/test/logreg_cd/bin_class.cc
 create mode 100644 dtrain/test/logreg_cd/bin_class.h
 create mode 100644 dtrain/test/logreg_cd/log_reg.cc
 create mode 100644 dtrain/test/logreg_cd/log_reg.h

(limited to 'dtrain/test')

diff --git a/dtrain/test/example/cdec.ini b/dtrain/test/example/cdec.ini
index 31a205c7..ff99de7b 100644
--- a/dtrain/test/example/cdec.ini
+++ b/dtrain/test/example/cdec.ini
@@ -1,7 +1,8 @@
 formalism=scfg
 add_pass_through_rules=true
-cubepruning_pop_limit=30
 scfg_max_span_limit=15
+intersection_strategy=cube_pruning
+cubepruning_pop_limit=30
 feature_function=WordPenalty
 feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz
-feature_function=RuleIdentityFeatures
+#feature_function=RuleIdentityFeatures
diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini
index 0b066013..fab4d317 100644
--- a/dtrain/test/example/dtrain.ini
+++ b/dtrain/test/example/dtrain.ini
@@ -1,18 +1,20 @@
 decoder_config=test/example/cdec.ini
 k=100
 N=3
-gamma=0.001
-epochs=20
-input=test/example/nc-1k-tabs.gz
-scorer=smooth_bleu
-output=- #weights.gz
-stop_after=5
+learning_rate=0.0005
+gamma=0
+epochs=3
+input=test/example/nc-wmt11.1k.gz
+output=-
+scorer=stupid_bleu
 sample_from=forest
-pair_sampling=108010
-select_weights=VOID
+#filter=unique
+pair_sampling=5050
+select_weights=last
 print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough
 tmp=/tmp
-#unit_weight_vector=
-keep_w=true
+stop_after=10
+#keep_w=
+#update_ok=
 #l1_reg=clip
-#l1_reg_strength=0.00001
+#l1_reg_strength=0.0001
diff --git a/dtrain/test/example/nc-1k-tabs.gz b/dtrain/test/example/nc-1k-tabs.gz
deleted file mode 100644
index 45496cd8..00000000
Binary files a/dtrain/test/example/nc-1k-tabs.gz and /dev/null differ
diff --git a/dtrain/test/example/nc-1k.gz b/dtrain/test/example/nc-1k.gz
deleted file mode 100644
index f638a166..00000000
Binary files a/dtrain/test/example/nc-1k.gz and /dev/null differ
diff --git a/dtrain/test/example/nc-wmt11.1k.gz b/dtrain/test/example/nc-wmt11.1k.gz
new file mode 100644
index 00000000..45496cd8
Binary files /dev/null and b/dtrain/test/example/nc-wmt11.1k.gz differ
diff --git a/dtrain/test/log_reg_dyer/bin_class.cc b/dtrain/test/log_reg_dyer/bin_class.cc
deleted file mode 100644
index 19bcde25..00000000
--- a/dtrain/test/log_reg_dyer/bin_class.cc
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "bin_class.h"
-
-Objective::~Objective() {}
-
diff --git a/dtrain/test/log_reg_dyer/bin_class.h b/dtrain/test/log_reg_dyer/bin_class.h
deleted file mode 100644
index 3466109a..00000000
--- a/dtrain/test/log_reg_dyer/bin_class.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef _BIN_CLASS_H_
-#define _BIN_CLASS_H_
-
-#include <vector>
-#include "sparse_vector.h"
-
-struct TrainingInstance {
-  // TODO add other info? loss for MIRA-type updates?
-  SparseVector<double> x_feature_map;
-  bool y;
-};
-
-struct Objective {
-  virtual ~Objective();
-
-  // returns f(x) and f'(x)
-  virtual double ObjectiveAndGradient(const SparseVector<double>& x,
-                  const std::vector<TrainingInstance>& training_instances,
-                  SparseVector<double>* g) const = 0;
-};
-
-#endif
diff --git a/dtrain/test/log_reg_dyer/log_reg.cc b/dtrain/test/log_reg_dyer/log_reg.cc
deleted file mode 100644
index ec2331fe..00000000
--- a/dtrain/test/log_reg_dyer/log_reg.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "log_reg.h"
-
-#include <vector>
-#include <cmath>
-
-#include "sparse_vector.h"
-
-using namespace std;
-
-double LogisticRegression::ObjectiveAndGradient(const SparseVector<double>& x,
-                              const vector<TrainingInstance>& training_instances,
-                              SparseVector<double>* g) const {
-  double cll = 0;
-  for (int i = 0; i < training_instances.size(); ++i) {
-    const double dotprod = training_instances[i].x_feature_map.dot(x); // TODO no bias, if bias, add x[0]
-    double lp_false = dotprod;
-    double lp_true = -dotprod;
-    if (0 < lp_true) {
-      lp_true += log1p(exp(-lp_true));
-      lp_false = log1p(exp(lp_false));
-    } else {
-      lp_true = log1p(exp(lp_true));
-      lp_false += log1p(exp(-lp_false));
-    }
-    lp_true *= -1;
-    lp_false *= -1;
-    if (training_instances[i].y) {  // true label
-      cll -= lp_true;
-      (*g) -= training_instances[i].x_feature_map * exp(lp_false);
-      // (*g)[0] -= exp(lp_false); // bias
-    } else {                  // false label
-      cll -= lp_false;
-      (*g) += training_instances[i].x_feature_map * exp(lp_true);
-      // g += corpus[i].second * exp(lp_true);
-    }
-  }
-  return cll;
-}
-
diff --git a/dtrain/test/log_reg_dyer/log_reg.h b/dtrain/test/log_reg_dyer/log_reg.h
deleted file mode 100644
index ecc560b8..00000000
--- a/dtrain/test/log_reg_dyer/log_reg.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _LOG_REG_H_
-#define _LOG_REG_H_
-
-#include <vector>
-#include "sparse_vector.h"
-#include "bin_class.h"
-
-struct LogisticRegression : public Objective {
-  double ObjectiveAndGradient(const SparseVector<double>& x,
-                              const std::vector<TrainingInstance>& training_instances,
-                              SparseVector<double>* g) const;
-};
-
-#endif
diff --git a/dtrain/test/logreg_cd/bin_class.cc b/dtrain/test/logreg_cd/bin_class.cc
new file mode 100644
index 00000000..19bcde25
--- /dev/null
+++ b/dtrain/test/logreg_cd/bin_class.cc
@@ -0,0 +1,4 @@
+#include "bin_class.h"
+
+Objective::~Objective() {}
+
diff --git a/dtrain/test/logreg_cd/bin_class.h b/dtrain/test/logreg_cd/bin_class.h
new file mode 100644
index 00000000..3466109a
--- /dev/null
+++ b/dtrain/test/logreg_cd/bin_class.h
@@ -0,0 +1,22 @@
+#ifndef _BIN_CLASS_H_
+#define _BIN_CLASS_H_
+
+#include <vector>
+#include "sparse_vector.h"
+
+struct TrainingInstance {
+  // TODO add other info? loss for MIRA-type updates?
+  SparseVector<double> x_feature_map;
+  bool y;
+};
+
+struct Objective {
+  virtual ~Objective();
+
+  // returns f(x) and f'(x)
+  virtual double ObjectiveAndGradient(const SparseVector<double>& x,
+                  const std::vector<TrainingInstance>& training_instances,
+                  SparseVector<double>* g) const = 0;
+};
+
+#endif
diff --git a/dtrain/test/logreg_cd/log_reg.cc b/dtrain/test/logreg_cd/log_reg.cc
new file mode 100644
index 00000000..ec2331fe
--- /dev/null
+++ b/dtrain/test/logreg_cd/log_reg.cc
@@ -0,0 +1,39 @@
+#include "log_reg.h"
+
+#include <vector>
+#include <cmath>
+
+#include "sparse_vector.h"
+
+using namespace std;
+
+double LogisticRegression::ObjectiveAndGradient(const SparseVector<double>& x,
+                              const vector<TrainingInstance>& training_instances,
+                              SparseVector<double>* g) const {
+  double cll = 0;
+  for (int i = 0; i < training_instances.size(); ++i) {
+    const double dotprod = training_instances[i].x_feature_map.dot(x); // TODO no bias, if bias, add x[0]
+    double lp_false = dotprod;
+    double lp_true = -dotprod;
+    if (0 < lp_true) {
+      lp_true += log1p(exp(-lp_true));
+      lp_false = log1p(exp(lp_false));
+    } else {
+      lp_true = log1p(exp(lp_true));
+      lp_false += log1p(exp(-lp_false));
+    }
+    lp_true *= -1;
+    lp_false *= -1;
+    if (training_instances[i].y) {  // true label
+      cll -= lp_true;
+      (*g) -= training_instances[i].x_feature_map * exp(lp_false);
+      // (*g)[0] -= exp(lp_false); // bias
+    } else {                  // false label
+      cll -= lp_false;
+      (*g) += training_instances[i].x_feature_map * exp(lp_true);
+      // g += corpus[i].second * exp(lp_true);
+    }
+  }
+  return cll;
+}
+
diff --git a/dtrain/test/logreg_cd/log_reg.h b/dtrain/test/logreg_cd/log_reg.h
new file mode 100644
index 00000000..ecc560b8
--- /dev/null
+++ b/dtrain/test/logreg_cd/log_reg.h
@@ -0,0 +1,14 @@
+#ifndef _LOG_REG_H_
+#define _LOG_REG_H_
+
+#include <vector>
+#include "sparse_vector.h"
+#include "bin_class.h"
+
+struct LogisticRegression : public Objective {
+  double ObjectiveAndGradient(const SparseVector<double>& x,
+                              const std::vector<TrainingInstance>& training_instances,
+                              SparseVector<double>* g) const;
+};
+
+#endif
diff --git a/dtrain/test/toy/dtrain.ini b/dtrain/test/toy/dtrain.ini
index 5bfa5b2d..105c07df 100644
--- a/dtrain/test/toy/dtrain.ini
+++ b/dtrain/test/toy/dtrain.ini
@@ -3,7 +3,7 @@ k=4
 N=3
 epochs=2
 input=test/toy/in
-scorer=stupid_bleu
-sample_from=forest
 output=-
+scorer=stupid_bleu
+sample_from=kbest
 print_weights=logp use_shell use_house PassThrough
-- 
cgit v1.2.3