From 43e7ecdca09f4125346f64d45e44f440ac964421 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Sun, 25 Sep 2011 20:23:09 +0200
Subject: removed some quirks, less boost, prettier code, score_t
---
dtrain/test/cdec_toy/cdec.ini | 1 +
dtrain/test/example/cdec.ini | 7 ++++++
dtrain/test/example/dtrain.ini | 11 +++++++++
dtrain/test/example/nc-1k-tabs.gz | Bin 0 -> 21185883 bytes
dtrain/test/example/nc-1k.gz | Bin 0 -> 21474865 bytes
dtrain/test/example/nc-wmt11.en.srilm.gz | Bin 0 -> 16017291 bytes
dtrain/test/example/weights.gz | Bin 0 -> 255 bytes
dtrain/test/log_reg_dyer/bin_class.cc | 4 ++++
dtrain/test/log_reg_dyer/bin_class.h | 22 +++++++++++++++++
dtrain/test/log_reg_dyer/log_reg.cc | 39 +++++++++++++++++++++++++++++++
dtrain/test/log_reg_dyer/log_reg.h | 14 +++++++++++
dtrain/test/logreg/bin_class.cc | 4 ----
dtrain/test/logreg/bin_class.h | 22 -----------------
dtrain/test/logreg/log_reg.cc | 39 -------------------------------
dtrain/test/logreg/log_reg.h | 14 -----------
dtrain/test/toy_example/dtrain.ini | 2 +-
16 files changed, 99 insertions(+), 80 deletions(-)
create mode 100644 dtrain/test/example/cdec.ini
create mode 100644 dtrain/test/example/dtrain.ini
create mode 100644 dtrain/test/example/nc-1k-tabs.gz
create mode 100644 dtrain/test/example/nc-1k.gz
create mode 100644 dtrain/test/example/nc-wmt11.en.srilm.gz
create mode 100644 dtrain/test/example/weights.gz
create mode 100644 dtrain/test/log_reg_dyer/bin_class.cc
create mode 100644 dtrain/test/log_reg_dyer/bin_class.h
create mode 100644 dtrain/test/log_reg_dyer/log_reg.cc
create mode 100644 dtrain/test/log_reg_dyer/log_reg.h
delete mode 100644 dtrain/test/logreg/bin_class.cc
delete mode 100644 dtrain/test/logreg/bin_class.h
delete mode 100644 dtrain/test/logreg/log_reg.cc
delete mode 100644 dtrain/test/logreg/log_reg.h
(limited to 'dtrain/test')
diff --git a/dtrain/test/cdec_toy/cdec.ini b/dtrain/test/cdec_toy/cdec.ini
index 3a6bab68..9eb34512 100644
--- a/dtrain/test/cdec_toy/cdec.ini
+++ b/dtrain/test/cdec_toy/cdec.ini
@@ -1,3 +1,4 @@
formalism=scfg
grammar=../dtrain/test/toy_cdec/grammar
add_pass_through_rules=true
+weights=../dtrain/test/toy_cdec/weights
diff --git a/dtrain/test/example/cdec.ini b/dtrain/test/example/cdec.ini
new file mode 100644
index 00000000..cdc8a8bb
--- /dev/null
+++ b/dtrain/test/example/cdec.ini
@@ -0,0 +1,7 @@
+formalism=scfg
+add_pass_through_rules=true
+cubepruning_pop_limit=30
+scfg_max_span_limit=15
+feature_function=WordPenalty
+feature_function=KLanguageModel /home/pks/z/X/x/dtrain/test/example/nc-wmt11.en.srilm.gz
+#feature_function=RuleIdentityFeatures
diff --git a/dtrain/test/example/dtrain.ini b/dtrain/test/example/dtrain.ini
new file mode 100644
index 00000000..aee3c89e
--- /dev/null
+++ b/dtrain/test/example/dtrain.ini
@@ -0,0 +1,11 @@
+decoder_config=test/example/cdec.ini
+ksamples=100
+ngrams=3
+epochs=1000
+input=test/example/nc-1k.gz
+scorer=stupid_bleu
+output=test/example/weights.gz
+stop_after=10
+sample_from=kbest
+pair_sampling=all
+print_weights=Glue WordPenalty LanguageModel LanguageModel_OOV PhraseModel_0 PhraseModel_1 PhraseModel_2 PhraseModel_3 PhraseModel_4 PassThrough
diff --git a/dtrain/test/example/nc-1k-tabs.gz b/dtrain/test/example/nc-1k-tabs.gz
new file mode 100644
index 00000000..45496cd8
Binary files /dev/null and b/dtrain/test/example/nc-1k-tabs.gz differ
diff --git a/dtrain/test/example/nc-1k.gz b/dtrain/test/example/nc-1k.gz
new file mode 100644
index 00000000..f638a166
Binary files /dev/null and b/dtrain/test/example/nc-1k.gz differ
diff --git a/dtrain/test/example/nc-wmt11.en.srilm.gz b/dtrain/test/example/nc-wmt11.en.srilm.gz
new file mode 100644
index 00000000..7ce81057
Binary files /dev/null and b/dtrain/test/example/nc-wmt11.en.srilm.gz differ
diff --git a/dtrain/test/example/weights.gz b/dtrain/test/example/weights.gz
new file mode 100644
index 00000000..21157427
Binary files /dev/null and b/dtrain/test/example/weights.gz differ
diff --git a/dtrain/test/log_reg_dyer/bin_class.cc b/dtrain/test/log_reg_dyer/bin_class.cc
new file mode 100644
index 00000000..19bcde25
--- /dev/null
+++ b/dtrain/test/log_reg_dyer/bin_class.cc
@@ -0,0 +1,4 @@
+#include "bin_class.h"
+
+Objective::~Objective() {}
+
diff --git a/dtrain/test/log_reg_dyer/bin_class.h b/dtrain/test/log_reg_dyer/bin_class.h
new file mode 100644
index 00000000..3466109a
--- /dev/null
+++ b/dtrain/test/log_reg_dyer/bin_class.h
@@ -0,0 +1,22 @@
+#ifndef _BIN_CLASS_H_
+#define _BIN_CLASS_H_
+
+#include
+#include "sparse_vector.h"
+
+struct TrainingInstance {
+ // TODO add other info? loss for MIRA-type updates?
+ SparseVector x_feature_map;
+ bool y;
+};
+
+struct Objective {
+ virtual ~Objective();
+
+ // returns f(x) and f'(x)
+ virtual double ObjectiveAndGradient(const SparseVector& x,
+ const std::vector& training_instances,
+ SparseVector* g) const = 0;
+};
+
+#endif
diff --git a/dtrain/test/log_reg_dyer/log_reg.cc b/dtrain/test/log_reg_dyer/log_reg.cc
new file mode 100644
index 00000000..ec2331fe
--- /dev/null
+++ b/dtrain/test/log_reg_dyer/log_reg.cc
@@ -0,0 +1,39 @@
+#include "log_reg.h"
+
+#include
+#include
+
+#include "sparse_vector.h"
+
+using namespace std;
+
+double LogisticRegression::ObjectiveAndGradient(const SparseVector& x,
+ const vector& training_instances,
+ SparseVector* g) const {
+ double cll = 0;
+ for (int i = 0; i < training_instances.size(); ++i) {
+ const double dotprod = training_instances[i].x_feature_map.dot(x); // TODO no bias, if bias, add x[0]
+ double lp_false = dotprod;
+ double lp_true = -dotprod;
+ if (0 < lp_true) {
+ lp_true += log1p(exp(-lp_true));
+ lp_false = log1p(exp(lp_false));
+ } else {
+ lp_true = log1p(exp(lp_true));
+ lp_false += log1p(exp(-lp_false));
+ }
+ lp_true *= -1;
+ lp_false *= -1;
+ if (training_instances[i].y) { // true label
+ cll -= lp_true;
+ (*g) -= training_instances[i].x_feature_map * exp(lp_false);
+ // (*g)[0] -= exp(lp_false); // bias
+ } else { // false label
+ cll -= lp_false;
+ (*g) += training_instances[i].x_feature_map * exp(lp_true);
+ // g += corpus[i].second * exp(lp_true);
+ }
+ }
+ return cll;
+}
+
diff --git a/dtrain/test/log_reg_dyer/log_reg.h b/dtrain/test/log_reg_dyer/log_reg.h
new file mode 100644
index 00000000..ecc560b8
--- /dev/null
+++ b/dtrain/test/log_reg_dyer/log_reg.h
@@ -0,0 +1,14 @@
+#ifndef _LOG_REG_H_
+#define _LOG_REG_H_
+
+#include
+#include "sparse_vector.h"
+#include "bin_class.h"
+
+struct LogisticRegression : public Objective {
+ double ObjectiveAndGradient(const SparseVector& x,
+ const std::vector& training_instances,
+ SparseVector* g) const;
+};
+
+#endif
diff --git a/dtrain/test/logreg/bin_class.cc b/dtrain/test/logreg/bin_class.cc
deleted file mode 100644
index 19bcde25..00000000
--- a/dtrain/test/logreg/bin_class.cc
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "bin_class.h"
-
-Objective::~Objective() {}
-
diff --git a/dtrain/test/logreg/bin_class.h b/dtrain/test/logreg/bin_class.h
deleted file mode 100644
index 3466109a..00000000
--- a/dtrain/test/logreg/bin_class.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef _BIN_CLASS_H_
-#define _BIN_CLASS_H_
-
-#include
-#include "sparse_vector.h"
-
-struct TrainingInstance {
- // TODO add other info? loss for MIRA-type updates?
- SparseVector x_feature_map;
- bool y;
-};
-
-struct Objective {
- virtual ~Objective();
-
- // returns f(x) and f'(x)
- virtual double ObjectiveAndGradient(const SparseVector& x,
- const std::vector& training_instances,
- SparseVector* g) const = 0;
-};
-
-#endif
diff --git a/dtrain/test/logreg/log_reg.cc b/dtrain/test/logreg/log_reg.cc
deleted file mode 100644
index ec2331fe..00000000
--- a/dtrain/test/logreg/log_reg.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-#include "log_reg.h"
-
-#include
-#include
-
-#include "sparse_vector.h"
-
-using namespace std;
-
-double LogisticRegression::ObjectiveAndGradient(const SparseVector& x,
- const vector& training_instances,
- SparseVector* g) const {
- double cll = 0;
- for (int i = 0; i < training_instances.size(); ++i) {
- const double dotprod = training_instances[i].x_feature_map.dot(x); // TODO no bias, if bias, add x[0]
- double lp_false = dotprod;
- double lp_true = -dotprod;
- if (0 < lp_true) {
- lp_true += log1p(exp(-lp_true));
- lp_false = log1p(exp(lp_false));
- } else {
- lp_true = log1p(exp(lp_true));
- lp_false += log1p(exp(-lp_false));
- }
- lp_true *= -1;
- lp_false *= -1;
- if (training_instances[i].y) { // true label
- cll -= lp_true;
- (*g) -= training_instances[i].x_feature_map * exp(lp_false);
- // (*g)[0] -= exp(lp_false); // bias
- } else { // false label
- cll -= lp_false;
- (*g) += training_instances[i].x_feature_map * exp(lp_true);
- // g += corpus[i].second * exp(lp_true);
- }
- }
- return cll;
-}
-
diff --git a/dtrain/test/logreg/log_reg.h b/dtrain/test/logreg/log_reg.h
deleted file mode 100644
index ecc560b8..00000000
--- a/dtrain/test/logreg/log_reg.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _LOG_REG_H_
-#define _LOG_REG_H_
-
-#include
-#include "sparse_vector.h"
-#include "bin_class.h"
-
-struct LogisticRegression : public Objective {
- double ObjectiveAndGradient(const SparseVector& x,
- const std::vector& training_instances,
- SparseVector* g) const;
-};
-
-#endif
diff --git a/dtrain/test/toy_example/dtrain.ini b/dtrain/test/toy_example/dtrain.ini
index 0cc222e1..3ab4f8d4 100644
--- a/dtrain/test/toy_example/dtrain.ini
+++ b/dtrain/test/toy_example/dtrain.ini
@@ -1,5 +1,5 @@
decoder_config=test/toy_example/cdec.ini
-kbest=4
+ksamples=4
ngrams=3
epochs=2
input=test/toy_example/toy.in
--
cgit v1.2.3