From 574e2336348e5d3960b3232209d01845b40e6ea8 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Mon, 21 Nov 2011 12:21:08 +0100
Subject: added pro stuff,clean up
---
dtrain/test/example/cdec.ini | 5 +++--
dtrain/test/example/dtrain.ini | 24 +++++++++++----------
dtrain/test/example/nc-1k-tabs.gz | Bin 21185883 -> 0 bytes
dtrain/test/example/nc-1k.gz | Bin 21474865 -> 0 bytes
dtrain/test/example/nc-wmt11.1k.gz | Bin 0 -> 21185883 bytes
dtrain/test/log_reg_dyer/bin_class.cc | 4 ----
dtrain/test/log_reg_dyer/bin_class.h | 22 -------------------
dtrain/test/log_reg_dyer/log_reg.cc | 39 ----------------------------------
dtrain/test/log_reg_dyer/log_reg.h | 14 ------------
dtrain/test/logreg_cd/bin_class.cc | 4 ++++
dtrain/test/logreg_cd/bin_class.h | 22 +++++++++++++++++++
dtrain/test/logreg_cd/log_reg.cc | 39 ++++++++++++++++++++++++++++++++++
dtrain/test/logreg_cd/log_reg.h | 14 ++++++++++++
dtrain/test/toy/dtrain.ini | 4 ++--
14 files changed, 97 insertions(+), 94 deletions(-)
delete mode 100644 dtrain/test/example/nc-1k-tabs.gz
delete mode 100644 dtrain/test/example/nc-1k.gz
create mode 100644 dtrain/test/example/nc-wmt11.1k.gz
delete mode 100644 dtrain/test/log_reg_dyer/bin_class.cc
delete mode 100644 dtrain/test/log_reg_dyer/bin_class.h
delete mode 100644 dtrain/test/log_reg_dyer/log_reg.cc
delete mode 100644 dtrain/test/log_reg_dyer/log_reg.h
create mode 100644 dtrain/test/logreg_cd/bin_class.cc
create mode 100644 dtrain/test/logreg_cd/bin_class.h
create mode 100644 dtrain/test/logreg_cd/log_reg.cc
create mode 100644 dtrain/test/logreg_cd/log_reg.h
(limited to 'dtrain/test')
diff --git a/dtrain/test/example/cdec.ini b/dtrain/test/example/cdec.ini
index 31a205c7..ff99de7b 100644
--- a/dtrain/test/example/cdec.ini
+++ b/dtrain/test/example/cdec.ini
@@ -1,7 +1,8 @@
formalism=scfg
add_pass_through_rules=true
-cubepruning_pop_limit=30
scfg_max_span_limit=15
+intersection_strategy=cube_pruning
+cubepruning_pop_limit=30
feature_function=WordPenalty
feature_function=KLanguageModel test/example/nc-wmt11.en.srilm.gz
-feature_function=RuleIdentityFeatures
+#feature_function=RuleIdentityFeatures
diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini
index 0b066013..fab4d317 100644
--- a/dtrain/test/example/dtrain.ini
+++ b/dtrain/test/example/dtrain.ini
@@ -1,18 +1,20 @@
decoder_config=test/example/cdec.ini
k=100
N=3
-gamma=0.001
-epochs=20
-input=test/example/nc-1k-tabs.gz
-scorer=smooth_bleu
-output=- #weights.gz
-stop_after=5
+learning_rate=0.0005
+gamma=0
+epochs=3
+input=test/example/nc-wmt11.1k.gz
+output=-
+scorer=stupid_bleu
sample_from=forest
-pair_sampling=108010
-select_weights=VOID
+#filter=unique
+pair_sampling=5050
+select_weights=last
print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough
tmp=/tmp
-#unit_weight_vector=
-keep_w=true
+stop_after=10
+#keep_w=
+#update_ok=
#l1_reg=clip
-#l1_reg_strength=0.00001
+#l1_reg_strength=0.0001
diff --git a/dtrain/test/example/nc-1k-tabs.gz b/dtrain/test/example/nc-1k-tabs.gz
deleted file mode 100644
index 45496cd8..00000000
Binary files a/dtrain/test/example/nc-1k-tabs.gz and /dev/null differ
diff --git a/dtrain/test/example/nc-1k.gz b/dtrain/test/example/nc-1k.gz
deleted file mode 100644
index f638a166..00000000
Binary files a/dtrain/test/example/nc-1k.gz and /dev/null differ
diff --git a/dtrain/test/example/nc-wmt11.1k.gz b/dtrain/test/example/nc-wmt11.1k.gz
new file mode 100644
index 00000000..45496cd8
Binary files /dev/null and b/dtrain/test/example/nc-wmt11.1k.gz differ
diff --git a/dtrain/test/log_reg_dyer/bin_class.cc b/dtrain/test/log_reg_dyer/bin_class.cc
deleted file mode 100644
index 19bcde25..00000000
--- a/dtrain/test/log_reg_dyer/bin_class.cc
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "bin_class.h"
-
-Objective::~Objective() {}
-
diff --git a/dtrain/test/log_reg_dyer/bin_class.h b/dtrain/test/log_reg_dyer/bin_class.h
deleted file mode 100644
index 3466109a..00000000
--- a/dtrain/test/log_reg_dyer/bin_class.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef _BIN_CLASS_H_
-#define _BIN_CLASS_H_
-
-#include
-#include "sparse_vector.h"
-
-struct TrainingInstance {
- // TODO add other info? loss for MIRA-type updates?
- SparseVector x_feature_map;
- bool y;
-};
-
-struct Objective {
- virtual ~Objective();
-
- // returns f(x) and f'(x)
- virtual double ObjectiveAndGradient(const SparseVector& x,
- const std::vector& training_instances,
- SparseVector* g) const = 0;
-};
-
-#endif
diff --git a/dtrain/test/log_reg_dyer/log_reg.cc b/dtrain/test/log_reg_dyer/log_reg.cc
deleted file mode 100644
index ec2331fe..00000000
--- a/dtrain/test/log_reg_dyer/log_reg.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "log_reg.h"
-
-#include
-#include
-
-#include "sparse_vector.h"
-
-using namespace std;
-
-double LogisticRegression::ObjectiveAndGradient(const SparseVector& x,
- const vector& training_instances,
- SparseVector* g) const {
- double cll = 0;
- for (int i = 0; i < training_instances.size(); ++i) {
- const double dotprod = training_instances[i].x_feature_map.dot(x); // TODO no bias, if bias, add x[0]
- double lp_false = dotprod;
- double lp_true = -dotprod;
- if (0 < lp_true) {
- lp_true += log1p(exp(-lp_true));
- lp_false = log1p(exp(lp_false));
- } else {
- lp_true = log1p(exp(lp_true));
- lp_false += log1p(exp(-lp_false));
- }
- lp_true *= -1;
- lp_false *= -1;
- if (training_instances[i].y) { // true label
- cll -= lp_true;
- (*g) -= training_instances[i].x_feature_map * exp(lp_false);
- // (*g)[0] -= exp(lp_false); // bias
- } else { // false label
- cll -= lp_false;
- (*g) += training_instances[i].x_feature_map * exp(lp_true);
- // g += corpus[i].second * exp(lp_true);
- }
- }
- return cll;
-}
-
diff --git a/dtrain/test/log_reg_dyer/log_reg.h b/dtrain/test/log_reg_dyer/log_reg.h
deleted file mode 100644
index ecc560b8..00000000
--- a/dtrain/test/log_reg_dyer/log_reg.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _LOG_REG_H_
-#define _LOG_REG_H_
-
-#include
-#include "sparse_vector.h"
-#include "bin_class.h"
-
-struct LogisticRegression : public Objective {
- double ObjectiveAndGradient(const SparseVector& x,
- const std::vector& training_instances,
- SparseVector* g) const;
-};
-
-#endif
diff --git a/dtrain/test/logreg_cd/bin_class.cc b/dtrain/test/logreg_cd/bin_class.cc
new file mode 100644
index 00000000..19bcde25
--- /dev/null
+++ b/dtrain/test/logreg_cd/bin_class.cc
@@ -0,0 +1,4 @@
+#include "bin_class.h"
+
+Objective::~Objective() {}
+
diff --git a/dtrain/test/logreg_cd/bin_class.h b/dtrain/test/logreg_cd/bin_class.h
new file mode 100644
index 00000000..3466109a
--- /dev/null
+++ b/dtrain/test/logreg_cd/bin_class.h
@@ -0,0 +1,22 @@
+#ifndef _BIN_CLASS_H_
+#define _BIN_CLASS_H_
+
+#include
+#include "sparse_vector.h"
+
+struct TrainingInstance {
+ // TODO add other info? loss for MIRA-type updates?
+ SparseVector x_feature_map;
+ bool y;
+};
+
+struct Objective {
+ virtual ~Objective();
+
+ // returns f(x) and f'(x)
+ virtual double ObjectiveAndGradient(const SparseVector& x,
+ const std::vector& training_instances,
+ SparseVector* g) const = 0;
+};
+
+#endif
diff --git a/dtrain/test/logreg_cd/log_reg.cc b/dtrain/test/logreg_cd/log_reg.cc
new file mode 100644
index 00000000..ec2331fe
--- /dev/null
+++ b/dtrain/test/logreg_cd/log_reg.cc
@@ -0,0 +1,39 @@
+#include "log_reg.h"
+
+#include
+#include
+
+#include "sparse_vector.h"
+
+using namespace std;
+
+double LogisticRegression::ObjectiveAndGradient(const SparseVector& x,
+ const vector& training_instances,
+ SparseVector* g) const {
+ double cll = 0;
+ for (int i = 0; i < training_instances.size(); ++i) {
+ const double dotprod = training_instances[i].x_feature_map.dot(x); // TODO no bias, if bias, add x[0]
+ double lp_false = dotprod;
+ double lp_true = -dotprod;
+ if (0 < lp_true) {
+ lp_true += log1p(exp(-lp_true));
+ lp_false = log1p(exp(lp_false));
+ } else {
+ lp_true = log1p(exp(lp_true));
+ lp_false += log1p(exp(-lp_false));
+ }
+ lp_true *= -1;
+ lp_false *= -1;
+ if (training_instances[i].y) { // true label
+ cll -= lp_true;
+ (*g) -= training_instances[i].x_feature_map * exp(lp_false);
+ // (*g)[0] -= exp(lp_false); // bias
+ } else { // false label
+ cll -= lp_false;
+ (*g) += training_instances[i].x_feature_map * exp(lp_true);
+ // g += corpus[i].second * exp(lp_true);
+ }
+ }
+ return cll;
+}
+
diff --git a/dtrain/test/logreg_cd/log_reg.h b/dtrain/test/logreg_cd/log_reg.h
new file mode 100644
index 00000000..ecc560b8
--- /dev/null
+++ b/dtrain/test/logreg_cd/log_reg.h
@@ -0,0 +1,14 @@
+#ifndef _LOG_REG_H_
+#define _LOG_REG_H_
+
+#include
+#include "sparse_vector.h"
+#include "bin_class.h"
+
+struct LogisticRegression : public Objective {
+ double ObjectiveAndGradient(const SparseVector& x,
+ const std::vector& training_instances,
+ SparseVector* g) const;
+};
+
+#endif
diff --git a/dtrain/test/toy/dtrain.ini b/dtrain/test/toy/dtrain.ini
index 5bfa5b2d..105c07df 100644
--- a/dtrain/test/toy/dtrain.ini
+++ b/dtrain/test/toy/dtrain.ini
@@ -3,7 +3,7 @@ k=4
N=3
epochs=2
input=test/toy/in
-scorer=stupid_bleu
-sample_from=forest
output=-
+scorer=stupid_bleu
+sample_from=kbest
print_weights=logp use_shell use_house PassThrough
--
cgit v1.2.3