From 2f1e5c6106995993c7211c7871126421e60d4909 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Tue, 23 Jun 2015 17:16:34 +0200
Subject: sample_net_interace.h
---
training/dtrain/sample_net_interface.h | 61 ++++++++++++++++++++++++++++++++++
1 file changed, 61 insertions(+)
create mode 100644 training/dtrain/sample_net_interface.h
(limited to 'training/dtrain/sample_net_interface.h')
diff --git a/training/dtrain/sample_net_interface.h b/training/dtrain/sample_net_interface.h
new file mode 100644
index 00000000..497149d9
--- /dev/null
+++ b/training/dtrain/sample_net_interface.h
@@ -0,0 +1,61 @@
+#ifndef _DTRAIN_SAMPLE_NET_H_
+#define _DTRAIN_SAMPLE_NET_H_
+
+#include "kbest.h"
+
+#include "score.h"
+
+namespace dtrain
+{
+
+struct ScoredKbest : public DecoderObserver
+{
+ const size_t k_;
+ size_t feature_count_, effective_sz_;
+ vector samples_;
+ PerSentenceBleuScorer* scorer_;
+ vector* ref_ngs_;
+ vector* ref_ls_;
+ bool dont_score;
+
+ ScoredKbest(const size_t k, PerSentenceBleuScorer* scorer) :
+ k_(k), scorer_(scorer), dont_score(false) {}
+
+ virtual void
+ NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg)
+ {
+ samples_.clear(); effective_sz_ = feature_count_ = 0;
+ KBest::KBestDerivations, ESentenceTraversal,
+ KBest::FilterUnique, prob_t, EdgeProb> kbest(*hg, k_);
+ for (size_t i = 0; i < k_; ++i) {
+ const KBest::KBestDerivations, ESentenceTraversal,
+ KBest::FilterUnique, prob_t, EdgeProb>::Derivation* d =
+ kbest.LazyKthBest(hg->nodes_.size() - 1, i);
+ if (!d) break;
+ ScoredHyp h;
+ h.w = d->yield;
+ h.f = d->feature_values;
+ h.model = log(d->score);
+ h.rank = i;
+ if (!dont_score)
+ h.gold = scorer_->Score(h.w, *ref_ngs_, *ref_ls_);
+ samples_.push_back(h);
+ effective_sz_++;
+ feature_count_ += h.f.size();
+ }
+ }
+
+ vector* GetSamples() { return &samples_; }
+ inline void SetReference(vector& ngs, vector& ls)
+ {
+ ref_ngs_ = &ngs;
+ ref_ls_ = &ls;
+ }
+ inline size_t GetFeatureCount() { return feature_count_; }
+ inline size_t GetSize() { return effective_sz_; }
+};
+
+} // namespace
+
+#endif
+
--
cgit v1.2.3
From 723cbf8d543ba8be2880b497518c87fc1cbf3573 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Wed, 24 Jun 2015 18:51:21 +0200
Subject: dtrain_net_interace: cleanup
---
training/dtrain/Makefile.am | 2 +-
training/dtrain/dtrain_net_interface.cc | 124 +++++++++++++++++++++++++++-----
training/dtrain/dtrain_net_interface.h | 4 +-
training/dtrain/sample_net_interface.h | 2 +-
training/dtrain/score.h | 6 +-
training/dtrain/score_net_interface.h | 2 +-
6 files changed, 113 insertions(+), 27 deletions(-)
(limited to 'training/dtrain/sample_net_interface.h')
diff --git a/training/dtrain/Makefile.am b/training/dtrain/Makefile.am
index ef256ffe..a0b5545b 100644
--- a/training/dtrain/Makefile.am
+++ b/training/dtrain/Makefile.am
@@ -9,5 +9,5 @@ dtrain_net_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../..
dtrain_net_interface_SOURCES = dtrain_net_interface.cc dtrain_net_interface.h dtrain.h sample_net_interface.h score_net_interface.h update.h
dtrain_net_interface_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a /fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib/libnanomsg.so
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/include
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/include -I/fast_scratch/simianer/lfpe/cppnanomsg
diff --git a/training/dtrain/dtrain_net_interface.cc b/training/dtrain/dtrain_net_interface.cc
index 265bc014..e9612def 100644
--- a/training/dtrain/dtrain_net_interface.cc
+++ b/training/dtrain/dtrain_net_interface.cc
@@ -1,6 +1,6 @@
-#include "dtrain_net.h"
-#include "sample_net.h"
-#include "score.h"
+#include "dtrain_net_interface.h"
+#include "sample_net_interface.h"
+#include "score_net_interface.h"
#include "update.h"
#include
@@ -18,9 +18,11 @@ main(int argc, char** argv)
exit(1); // something is wrong
const size_t k = conf["k"].as();
const size_t N = conf["N"].as();
+ weight_t eta = conf["learning_rate"].as();
const weight_t margin = conf["margin"].as();
const string master_addr = conf["addr"].as();
const string output_fn = conf["output"].as();
+ const string debug_fn = conf["debug_output"].as();
// setup decoder
register_feature_functions();
@@ -39,7 +41,7 @@ main(int argc, char** argv)
cerr << _p4;
// output configuration
- cerr << "dtrain_net" << endl << "Parameters:" << endl;
+ cerr << "dtrain_net_interface" << endl << "Parameters:" << endl;
cerr << setw(25) << "k " << k << endl;
cerr << setw(25) << "N " << N << endl;
cerr << setw(25) << "margin " << margin << endl;
@@ -53,9 +55,18 @@ main(int argc, char** argv)
string hello = "hello";
sock.send(hello.c_str(), hello.size()+1, 0);
+ // debug
+ ostringstream debug_output;
+
size_t i = 0;
while(true)
{
+ // debug --
+ debug_output.str(string());
+ debug_output.clear();
+ debug_output << "{" << endl;
+ // -- debug
+
char *buf = NULL;
string source;
vector refs;
@@ -65,29 +76,33 @@ main(int argc, char** argv)
if (buf) {
const string in(buf, buf+sz);
nn::freemsg(buf);
- cerr << "got input '" << in << "'" << endl;
+ cerr << "[dtrain] got input '" << in << "'" << endl;
if (in == "shutdown") { // shut down
- cerr << "got shutdown signal" << endl;
+ cerr << "[dtrain] got shutdown signal" << endl;
next = false;
} else { // translate
vector parts;
boost::algorithm::split_regex(parts, in, boost::regex(" \\|\\|\\| "));
if (parts[0] == "act:translate") {
- cerr << "translating ..." << endl;
+ cerr << "[dtrain] translating ..." << endl;
lambdas.init_vector(&decoder_weights);
observer->dont_score = true;
decoder.Decode(parts[1], observer);
observer->dont_score = false;
vector* samples = observer->GetSamples();
ostringstream os;
- cerr << "1best features " << (*samples)[0].f << endl;
+ cerr << "[dtrain] 1best features " << (*samples)[0].f << endl;
PrintWordIDVec((*samples)[0].w, os);
sock.send(os.str().c_str(), os.str().size()+1, 0);
- cerr << "> done translating, looping" << endl;
+ cerr << "[dtrain] done translating, looping again" << endl;
continue;
} else { // learn
- cerr << "learning ..." << endl;
+ cerr << "[dtrain] learning ..." << endl;
source = parts[0];
+ // debug --
+ debug_output << "\"source\":\"" << source.substr(source.find_first_of(">")+1, source.find_last_of("<")-3) << "\"," << endl;
+ debug_output << "\"target\":\"" << parts[1] << "\"," << endl;
+ // -- debug
parts.erase(parts.begin());
for (auto s: parts) {
vector r;
@@ -110,24 +125,95 @@ main(int argc, char** argv)
observer->SetReference(refs, rsz);
decoder.Decode(source, observer);
vector* samples = observer->GetSamples();
- cerr << "samples size " << samples->size() << endl;
+
+ // debug --
+ debug_output << "\"1best\":\"";
+ PrintWordIDVec((*samples)[0].w, debug_output);
+ debug_output << "\"," << endl;
+ debug_output << "\"kbest\":[" << endl;
+ size_t h = 0;
+ for (auto s: *samples) {
+ debug_output << "\"" << s.gold << " ||| " << s.model << " ||| " << s.rank << " ||| ";
+ debug_output << "EgivenFCoherent=" << s.f[FD::Convert("EgivenFCoherent")] << " ";
+ debug_output << "SampleCountF=" << s.f[FD::Convert("CountEF")] << " ";
+ debug_output << "MaxLexFgivenE=" << s.f[FD::Convert("MaxLexFgivenE")] << " ";
+ debug_output << "MaxLexEgivenF=" << s.f[FD::Convert("MaxLexEgivenF")] << " ";
+ debug_output << "IsSingletonF=" << s.f[FD::Convert("IsSingletonF")] << " ";
+ debug_output << "IsSingletonFE=" << s.f[FD::Convert("IsSingletonFE")] << " ";
+ debug_output << "Glue=:" << s.f[FD::Convert("Glue")] << " ";
+ debug_output << "WordPenalty=" << s.f[FD::Convert("WordPenalty")] << " ";
+ debug_output << "PassThrough=" << s.f[FD::Convert("PassThrough")] << " ";
+ debug_output << "LanguageModel=" << s.f[FD::Convert("LanguageModel_OOV")];
+ debug_output << " ||| ";
+ PrintWordIDVec(s.w, debug_output);
+ h += 1;
+ debug_output << "\"";
+ if (h < samples->size()) {
+ debug_output << ",";
+ }
+ debug_output << endl;
+ }
+ debug_output << "]," << endl;
+ debug_output << "\"samples_size\":" << samples->size() << "," << endl;
+ debug_output << "\"weights_before\":{" << endl;
+ debug_output << "\"EgivenFCoherent\":" << lambdas[FD::Convert("EgivenFCoherent")] << "," << endl;
+ debug_output << "\"SampleCountF\":" << lambdas[FD::Convert("CountEF")] << "," << endl;
+ debug_output << "\"MaxLexFgivenE\":" << lambdas[FD::Convert("MaxLexFgivenE")] << "," << endl;
+ debug_output << "\"MaxLexEgivenF\":" << lambdas[FD::Convert("MaxLexEgivenF")] << "," << endl;
+ debug_output << "\"IsSingletonF\":" << lambdas[FD::Convert("IsSingletonF")] << "," << endl;
+ debug_output << "\"IsSingletonFE\":" << lambdas[FD::Convert("IsSingletonFE")] << "," << endl;
+ debug_output << "\"Glue\":" << lambdas[FD::Convert("Glue")] << "," << endl;
+ debug_output << "\"WordPenalty\":" << lambdas[FD::Convert("WordPenalty")] << "," << endl;
+ debug_output << "\"PassThrough\":" << lambdas[FD::Convert("PassThrough")] << "," << endl;
+ debug_output << "\"LanguageModel\":" << lambdas[FD::Convert("LanguageModel_OOV")] << endl;
+ debug_output << "}," << endl;
+ // -- debug
// get pairs and update
SparseVector updates;
- CollectUpdates(samples, updates, margin);
- cerr << "updates size " << updates.size() << endl;
- cerr << "lambdas before " << lambdas << endl;
- lambdas.plus_eq_v_times_s(updates, 1.0); // FIXME: learning rate?
- cerr << "lambdas after " << lambdas << endl;
+ size_t num_up = CollectUpdates(samples, updates, margin);
+
+ // debug --
+ debug_output << "\"num_up\":" << num_up << "," << endl;
+ debug_output << "\"updated_features\":" << updates.size() << "," << endl;
+ debug_output << "\"learning_rate\":" << eta << "," << endl;
+ debug_output << "\"best_match\":\"";
+ PrintWordIDVec((*samples)[0].w, debug_output);
+ debug_output << "\"," << endl;
+ debug_output << "\"best_match_score\":" << (*samples)[0].gold << "," << endl ;
+ // -- debug
+
+ lambdas.plus_eq_v_times_s(updates, eta);
i++;
- cerr << "> done learning, looping" << endl;
+ // debug --
+ debug_output << "\"weights_after\":{" << endl;
+ debug_output << "\"EgivenFCoherent\":" << lambdas[FD::Convert("EgivenFCoherent")] << "," << endl;
+ debug_output << "\"SampleCountF\":" << lambdas[FD::Convert("CountEF")] << "," << endl;
+ debug_output << "\"MaxLexFgivenE\":" << lambdas[FD::Convert("MaxLexFgivenE")] << "," << endl;
+ debug_output << "\"MaxLexEgivenF\":" << lambdas[FD::Convert("MaxLexEgivenF")] << "," << endl;
+ debug_output << "\"IsSingletonF\":" << lambdas[FD::Convert("IsSingletonF")] << "," << endl;
+ debug_output << "\"IsSingletonFE\":" << lambdas[FD::Convert("IsSingletonFE")] << "," << endl;
+ debug_output << "\"Glue\":" << lambdas[FD::Convert("Glue")] << "," << endl;
+ debug_output << "\"WordPenalty\":" << lambdas[FD::Convert("WordPenalty")] << "," << endl;
+ debug_output << "\"PassThrough\":" << lambdas[FD::Convert("PassThrough")] << "," << endl;
+ debug_output << "\"LanguageModel\":" << lambdas[FD::Convert("LanguageModel_OOV")] << endl;
+ debug_output << "}" << endl;
+ debug_output << "}" << endl;
+ // -- debug
+
+ cerr << "[dtrain] done learning, looping again" << endl;
string done = "done";
sock.send(done.c_str(), done.size()+1, 0);
+
+ // debug --
+ WriteFile f(debug_fn);
+ *f << debug_output.str();
+ // -- debug
} // input loop
if (output_fn != "") {
- cerr << "writing final weights to '" << output_fn << "'" << endl;
+ cerr << "[dtrain] writing final weights to '" << output_fn << "'" << endl;
lambdas.init_vector(decoder_weights);
Weights::WriteToFile(output_fn, decoder_weights, true);
}
@@ -135,7 +221,7 @@ main(int argc, char** argv)
string shutdown = "off";
sock.send(shutdown.c_str(), shutdown.size()+1, 0);
- cerr << "shutting down, goodbye" << endl;
+ cerr << "[dtrain] shutting down, goodbye" << endl;
return 0;
}
diff --git a/training/dtrain/dtrain_net_interface.h b/training/dtrain/dtrain_net_interface.h
index 1c724b55..2c539930 100644
--- a/training/dtrain/dtrain_net_interface.h
+++ b/training/dtrain/dtrain_net_interface.h
@@ -1,5 +1,5 @@
-#ifndef _DTRAIN_NET_H_
-#define _DTRAIN_NET_H_
+#ifndef _DTRAIN_NET_INTERFACE_H_
+#define _DTRAIN_NET_INTERFACE_H_
#include "dtrain.h"
diff --git a/training/dtrain/sample_net_interface.h b/training/dtrain/sample_net_interface.h
index 497149d9..98b10c82 100644
--- a/training/dtrain/sample_net_interface.h
+++ b/training/dtrain/sample_net_interface.h
@@ -3,7 +3,7 @@
#include "kbest.h"
-#include "score.h"
+#include "score_net_interface.h"
namespace dtrain
{
diff --git a/training/dtrain/score.h b/training/dtrain/score.h
index d909dbf3..e6e60acb 100644
--- a/training/dtrain/score.h
+++ b/training/dtrain/score.h
@@ -1,5 +1,5 @@
-#ifndef _DTRAIN_SCORE_NET_INTERFACE_H_
-#define _DTRAIN_SCORE_NET_INTERFACE_H_
+#ifndef _DTRAIN_SCORE_H_
+#define _DTRAIN_SCORE_H_
#include "dtrain.h"
@@ -153,7 +153,7 @@ struct PerSentenceBleuScorer
size_t best = numeric_limits::max();
for (auto l: ref_ls) {
size_t d = abs(hl-l);
- if (d < best) {
+ if (d < best) {
best_idx = i;
best = d;
}
diff --git a/training/dtrain/score_net_interface.h b/training/dtrain/score_net_interface.h
index 6e359249..58357cf6 100644
--- a/training/dtrain/score_net_interface.h
+++ b/training/dtrain/score_net_interface.h
@@ -153,7 +153,7 @@ struct PerSentenceBleuScorer
size_t best = numeric_limits::max();
for (auto l: ref_ls) {
size_t d = abs(hl-l);
- if (d < best) {
+ if (d < best) {
best_idx = i;
best = d;
}
--
cgit v1.2.3
From 96379c9adef0a1c5b970e7765369e85833514405 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Fri, 26 Jun 2015 15:24:42 +0200
Subject: refactoring, more features (resetting, setting learning rate(s))
---
training/dtrain/dtrain_net_interface.cc | 94 ++++++++++++++++++---------------
training/dtrain/dtrain_net_interface.h | 36 ++++++++++---
training/dtrain/sample_net_interface.h | 2 +-
3 files changed, 81 insertions(+), 51 deletions(-)
(limited to 'training/dtrain/sample_net_interface.h')
diff --git a/training/dtrain/dtrain_net_interface.cc b/training/dtrain/dtrain_net_interface.cc
index e9612def..3b19ecbf 100644
--- a/training/dtrain/dtrain_net_interface.cc
+++ b/training/dtrain/dtrain_net_interface.cc
@@ -19,10 +19,14 @@ main(int argc, char** argv)
const size_t k = conf["k"].as();
const size_t N = conf["N"].as();
weight_t eta = conf["learning_rate"].as();
+ weight_t eta_sparse = conf["learning_rate_sparse"].as();
const weight_t margin = conf["margin"].as();
const string master_addr = conf["addr"].as();
const string output_fn = conf["output"].as();
const string debug_fn = conf["debug_output"].as();
+ vector dense_features;
+ boost::split(dense_features, conf["dense_features"].as(),
+ boost::is_any_of(" "));
// setup decoder
register_feature_functions();
@@ -33,10 +37,11 @@ main(int argc, char** argv)
// weights
vector& decoder_weights = decoder.CurrentWeightVector();
- SparseVector lambdas, w_average;
+ SparseVector lambdas, w_average, original_lambdas;
if (conf.count("input_weights")) {
Weights::InitFromFile(conf["input_weights"].as(), &decoder_weights);
Weights::InitSparseVector(decoder_weights, &lambdas);
+ Weights::InitSparseVector(decoder_weights, &original_lambdas);
}
cerr << _p4;
@@ -44,6 +49,8 @@ main(int argc, char** argv)
cerr << "dtrain_net_interface" << endl << "Parameters:" << endl;
cerr << setw(25) << "k " << k << endl;
cerr << setw(25) << "N " << N << endl;
+ cerr << setw(25) << "eta " << eta << endl;
+ cerr << setw(25) << "eta (sparse) " << eta_sparse << endl;
cerr << setw(25) << "margin " << margin << endl;
cerr << setw(25) << "decoder conf " << "'"
<< conf["decoder_conf"].as() << "'" << endl;
@@ -58,13 +65,15 @@ main(int argc, char** argv)
// debug
ostringstream debug_output;
+ string done = "done";
+
size_t i = 0;
while(true)
{
// debug --
debug_output.str(string());
debug_output.clear();
- debug_output << "{" << endl;
+ debug_output << "{" << endl; // hack us a nice JSON output
// -- debug
char *buf = NULL;
@@ -77,7 +86,31 @@ main(int argc, char** argv)
const string in(buf, buf+sz);
nn::freemsg(buf);
cerr << "[dtrain] got input '" << in << "'" << endl;
- if (in == "shutdown") { // shut down
+ if (boost::starts_with(in, "set_learning_rate")) { // set learning rate
+ stringstream ss(in);
+ string x; weight_t w;
+ ss >> x; ss >> w;
+ cerr << "[dtrain] setting (dense) learning rate to " << w << " (was: " << eta << ")" << endl;
+ eta = w;
+ cerr << "[dtrain] done, looping again" << endl;
+ sock.send(done.c_str(), done.size()+1, 0);
+ continue;
+ } else if (boost::starts_with(in, "set_sparse_learning_rate")) { // set sparse learning rate
+ stringstream ss(in);
+ string x; weight_t w;
+ ss >> x; ss >> w;
+ cerr << "[dtrain] setting sparse learning rate to " << w << " (was: " << eta_sparse << ")" << endl;
+ eta_sparse = w;
+ cerr << "[dtrain] done, looping again" << endl;
+ sock.send(done.c_str(), done.size()+1, 0);
+ continue;
+ } else if (boost::starts_with(in, "reset_weights")) { // reset weights
+ cerr << "[dtrain] resetting weights" << endl;
+ lambdas = original_lambdas;
+ cerr << "[dtrain] done, looping again" << endl;
+ sock.send(done.c_str(), done.size()+1, 0);
+ continue;
+ } else if (in == "shutdown") { // shut down
cerr << "[dtrain] got shutdown signal" << endl;
next = false;
} else { // translate
@@ -134,16 +167,8 @@ main(int argc, char** argv)
size_t h = 0;
for (auto s: *samples) {
debug_output << "\"" << s.gold << " ||| " << s.model << " ||| " << s.rank << " ||| ";
- debug_output << "EgivenFCoherent=" << s.f[FD::Convert("EgivenFCoherent")] << " ";
- debug_output << "SampleCountF=" << s.f[FD::Convert("CountEF")] << " ";
- debug_output << "MaxLexFgivenE=" << s.f[FD::Convert("MaxLexFgivenE")] << " ";
- debug_output << "MaxLexEgivenF=" << s.f[FD::Convert("MaxLexEgivenF")] << " ";
- debug_output << "IsSingletonF=" << s.f[FD::Convert("IsSingletonF")] << " ";
- debug_output << "IsSingletonFE=" << s.f[FD::Convert("IsSingletonFE")] << " ";
- debug_output << "Glue=:" << s.f[FD::Convert("Glue")] << " ";
- debug_output << "WordPenalty=" << s.f[FD::Convert("WordPenalty")] << " ";
- debug_output << "PassThrough=" << s.f[FD::Convert("PassThrough")] << " ";
- debug_output << "LanguageModel=" << s.f[FD::Convert("LanguageModel_OOV")];
+ for (auto o: s.f)
+ debug_output << FD::Convert(o.first) << "=" << o.second << " ";
debug_output << " ||| ";
PrintWordIDVec(s.w, debug_output);
h += 1;
@@ -156,67 +181,52 @@ main(int argc, char** argv)
debug_output << "]," << endl;
debug_output << "\"samples_size\":" << samples->size() << "," << endl;
debug_output << "\"weights_before\":{" << endl;
- debug_output << "\"EgivenFCoherent\":" << lambdas[FD::Convert("EgivenFCoherent")] << "," << endl;
- debug_output << "\"SampleCountF\":" << lambdas[FD::Convert("CountEF")] << "," << endl;
- debug_output << "\"MaxLexFgivenE\":" << lambdas[FD::Convert("MaxLexFgivenE")] << "," << endl;
- debug_output << "\"MaxLexEgivenF\":" << lambdas[FD::Convert("MaxLexEgivenF")] << "," << endl;
- debug_output << "\"IsSingletonF\":" << lambdas[FD::Convert("IsSingletonF")] << "," << endl;
- debug_output << "\"IsSingletonFE\":" << lambdas[FD::Convert("IsSingletonFE")] << "," << endl;
- debug_output << "\"Glue\":" << lambdas[FD::Convert("Glue")] << "," << endl;
- debug_output << "\"WordPenalty\":" << lambdas[FD::Convert("WordPenalty")] << "," << endl;
- debug_output << "\"PassThrough\":" << lambdas[FD::Convert("PassThrough")] << "," << endl;
- debug_output << "\"LanguageModel\":" << lambdas[FD::Convert("LanguageModel_OOV")] << endl;
+ weightsToJson(lambdas, debug_output);
debug_output << "}," << endl;
// -- debug
// get pairs and update
SparseVector updates;
size_t num_up = CollectUpdates(samples, updates, margin);
-
+ updates *= eta_sparse; // apply learning rate for sparse features
+ for (auto feat: dense_features) { // apply learning rate for dense features
+ updates[FD::Convert(feat)] /= eta_sparse;
+ updates[FD::Convert(feat)] *= eta;
+ }
// debug --
debug_output << "\"num_up\":" << num_up << "," << endl;
debug_output << "\"updated_features\":" << updates.size() << "," << endl;
debug_output << "\"learning_rate\":" << eta << "," << endl;
+ debug_output << "\"learning_rate_sparse\":" << eta_sparse << "," << endl;
debug_output << "\"best_match\":\"";
PrintWordIDVec((*samples)[0].w, debug_output);
debug_output << "\"," << endl;
debug_output << "\"best_match_score\":" << (*samples)[0].gold << "," << endl ;
// -- debug
-
- lambdas.plus_eq_v_times_s(updates, eta);
+ lambdas.plus_eq_v_times_s(updates, 1.0);
i++;
// debug --
debug_output << "\"weights_after\":{" << endl;
- debug_output << "\"EgivenFCoherent\":" << lambdas[FD::Convert("EgivenFCoherent")] << "," << endl;
- debug_output << "\"SampleCountF\":" << lambdas[FD::Convert("CountEF")] << "," << endl;
- debug_output << "\"MaxLexFgivenE\":" << lambdas[FD::Convert("MaxLexFgivenE")] << "," << endl;
- debug_output << "\"MaxLexEgivenF\":" << lambdas[FD::Convert("MaxLexEgivenF")] << "," << endl;
- debug_output << "\"IsSingletonF\":" << lambdas[FD::Convert("IsSingletonF")] << "," << endl;
- debug_output << "\"IsSingletonFE\":" << lambdas[FD::Convert("IsSingletonFE")] << "," << endl;
- debug_output << "\"Glue\":" << lambdas[FD::Convert("Glue")] << "," << endl;
- debug_output << "\"WordPenalty\":" << lambdas[FD::Convert("WordPenalty")] << "," << endl;
- debug_output << "\"PassThrough\":" << lambdas[FD::Convert("PassThrough")] << "," << endl;
- debug_output << "\"LanguageModel\":" << lambdas[FD::Convert("LanguageModel_OOV")] << endl;
+ weightsToJson(lambdas, debug_output);
debug_output << "}" << endl;
debug_output << "}" << endl;
// -- debug
cerr << "[dtrain] done learning, looping again" << endl;
- string done = "done";
sock.send(done.c_str(), done.size()+1, 0);
// debug --
WriteFile f(debug_fn);
*f << debug_output.str();
// -- debug
- } // input loop
- if (output_fn != "") {
- cerr << "[dtrain] writing final weights to '" << output_fn << "'" << endl;
+ // write current weights
lambdas.init_vector(decoder_weights);
- Weights::WriteToFile(output_fn, decoder_weights, true);
- }
+ ostringstream fn;
+ fn << output_fn << "." << i << ".gz";
+ Weights::WriteToFile(fn.str(), decoder_weights, true);
+ } // input loop
string shutdown = "off";
sock.send(shutdown.c_str(), shutdown.size()+1, 0);
diff --git a/training/dtrain/dtrain_net_interface.h b/training/dtrain/dtrain_net_interface.h
index 2c539930..e603a87f 100644
--- a/training/dtrain/dtrain_net_interface.h
+++ b/training/dtrain/dtrain_net_interface.h
@@ -6,6 +6,23 @@
namespace dtrain
{
+inline void
+weightsToJson(SparseVector& w, ostringstream& os)
+{
+ vector strs;
+ for (typename SparseVector::iterator it=w.begin(),e=w.end(); it!=e; ++it) {
+ ostringstream a;
+ a << "\"" << FD::Convert(it->first) << "\":" << it->second;
+ strs.push_back(a.str());
+ }
+ for (vector::const_iterator it=strs.begin(); it!=strs.end(); it++) {
+ os << *it;
+ if ((it+1) != strs.end())
+ os << ",";
+ os << endl;
+ }
+}
+
template
inline void
vectorAsString(SparseVector& v, ostringstream& os)
@@ -39,14 +56,17 @@ dtrain_net_init(int argc, char** argv, po::variables_map* conf)
{
po::options_description ini("Configuration File Options");
ini.add_options()
- ("decoder_conf,C", po::value(), "configuration file for decoder")
- ("k", po::value()->default_value(100), "size of kbest list")
- ("N", po::value()->default_value(4), "N for BLEU approximation")
- ("margin,m", po::value()->default_value(0.), "margin for margin perceptron")
- ("output,o", po::value()->default_value(""), "final weights file")
- ("input_weights,w", po::value(), "input weights file")
- ("learning_rate,l", po::value()->default_value(1.0), "learning rate")
- ("debug_output,d", po::value()->default_value(""), "file for debug output");
+ ("decoder_conf,C", po::value(), "configuration file for decoder")
+ ("k", po::value()->default_value(100), "size of kbest list")
+ ("N", po::value()->default_value(4), "N for BLEU approximation")
+ ("margin,m", po::value()->default_value(0.), "margin for margin perceptron")
+ ("output,o", po::value()->default_value(""), "final weights file")
+ ("input_weights,w", po::value(), "input weights file")
+ ("learning_rate,l", po::value()->default_value(1.0), "learning rate")
+ ("learning_rate_sparse,l", po::value()->default_value(1.0), "learning rate for sparse features")
+ ("dense_features,D", po::value()->default_value("EgivenFCoherent SampleCountF CountEF MaxLexFgivenE MaxLexEgivenF IsSingletonF IsSingletonFE Glue WordPenalty PassThrough LanguageModel LanguageModel_OOV"),
+ "dense features")
+ ("debug_output,d", po::value()->default_value(""), "file for debug output");
po::options_description cl("Command Line Options");
cl.add_options()
("conf,c", po::value(), "dtrain configuration file")
diff --git a/training/dtrain/sample_net_interface.h b/training/dtrain/sample_net_interface.h
index 98b10c82..affcd0d6 100644
--- a/training/dtrain/sample_net_interface.h
+++ b/training/dtrain/sample_net_interface.h
@@ -22,7 +22,7 @@ struct ScoredKbest : public DecoderObserver
k_(k), scorer_(scorer), dont_score(false) {}
virtual void
- NotifyTranslationForest(const SentenceMetadata& smeta, Hypergraph* hg)
+ NotifyTranslationForest(const SentenceMetadata& /*smeta*/, Hypergraph* hg)
{
samples_.clear(); effective_sz_ = feature_count_ = 0;
KBest::KBestDerivations, ESentenceTraversal,
--
cgit v1.2.3
From 5cc7ce74d79bb6b2f7d04305cef47374d857b603 Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Tue, 20 Oct 2015 13:50:29 +0200
Subject: small fix
---
training/dtrain/dtrain_net_interface.cc | 2 +-
training/dtrain/sample_net_interface.h | 3 +++
2 files changed, 4 insertions(+), 1 deletion(-)
(limited to 'training/dtrain/sample_net_interface.h')
diff --git a/training/dtrain/dtrain_net_interface.cc b/training/dtrain/dtrain_net_interface.cc
index e21920d0..6d24bb7b 100644
--- a/training/dtrain/dtrain_net_interface.cc
+++ b/training/dtrain/dtrain_net_interface.cc
@@ -127,7 +127,7 @@ main(int argc, char** argv)
ostringstream os;
cerr << "[dtrain] 1best features " << (*samples)[0].f << endl;
if (output_derivation) {
- os << observer->GetViterbiTreeString() << endl;
+ os << observer->GetViterbiTreeStr() << endl;
} else {
PrintWordIDVec((*samples)[0].w, os);
}
diff --git a/training/dtrain/sample_net_interface.h b/training/dtrain/sample_net_interface.h
index affcd0d6..a2b5f87d 100644
--- a/training/dtrain/sample_net_interface.h
+++ b/training/dtrain/sample_net_interface.h
@@ -17,6 +17,7 @@ struct ScoredKbest : public DecoderObserver
vector* ref_ngs_;
vector* ref_ls_;
bool dont_score;
+ string viterbiTreeStr_;
ScoredKbest(const size_t k, PerSentenceBleuScorer* scorer) :
k_(k), scorer_(scorer), dont_score(false) {}
@@ -42,6 +43,7 @@ struct ScoredKbest : public DecoderObserver
samples_.push_back(h);
effective_sz_++;
feature_count_ += h.f.size();
+ viterbiTreeStr_ = hg->show_viterbi_tree(false);
}
}
@@ -53,6 +55,7 @@ struct ScoredKbest : public DecoderObserver
}
inline size_t GetFeatureCount() { return feature_count_; }
inline size_t GetSize() { return effective_sz_; }
+ inline string GetViterbiTreeStr() { return viterbiTreeStr_; }
};
} // namespace
--
cgit v1.2.3
From a046645ca3e2ac1ac8839ba2856c49bd771be62f Mon Sep 17 00:00:00 2001
From: Patrick Simianer
Date: Thu, 5 Nov 2015 20:17:40 +0100
Subject: dtrain_net_interface output rules too
---
training/dtrain/dtrain_net_interface.cc | 4 ++++
training/dtrain/dtrain_net_interface.h | 1 +
training/dtrain/sample_net_interface.h | 6 +++++-
3 files changed, 10 insertions(+), 1 deletion(-)
(limited to 'training/dtrain/sample_net_interface.h')
diff --git a/training/dtrain/dtrain_net_interface.cc b/training/dtrain/dtrain_net_interface.cc
index 01b110b4..f16b9304 100644
--- a/training/dtrain/dtrain_net_interface.cc
+++ b/training/dtrain/dtrain_net_interface.cc
@@ -28,6 +28,7 @@ main(int argc, char** argv)
boost::split(dense_features, conf["dense_features"].as(),
boost::is_any_of(" "));
const bool output_derivation = conf["output_derivation"].as();
+ const bool output_rules = conf["output_rules"].as();
// setup decoder
register_feature_functions();
@@ -132,6 +133,9 @@ main(int argc, char** argv)
} else {
PrintWordIDVec((*samples)[0].w, os);
}
+ if (output_rules) {
+ os << observer->GetViterbiRules() << endl;
+ }
sock.send(os.str().c_str(), os.str().size()+1, 0);
cerr << "[dtrain] done translating, looping again" << endl;
continue;
diff --git a/training/dtrain/dtrain_net_interface.h b/training/dtrain/dtrain_net_interface.h
index 3c7665a2..816237c3 100644
--- a/training/dtrain/dtrain_net_interface.h
+++ b/training/dtrain/dtrain_net_interface.h
@@ -65,6 +65,7 @@ dtrain_net_init(int argc, char** argv, po::variables_map* conf)
("learning_rate,l", po::value()->default_value(1.0), "learning rate")
("learning_rate_sparse,l", po::value()->default_value(1.0), "learning rate for sparse features")
("output_derivation,E", po::bool_switch()->default_value(false), "output derivation, not viterbi str")
+ ("output_rules,R", po::bool_switch()->default_value(false), "also output rules")
("dense_features,D", po::value()->default_value("EgivenFCoherent SampleCountF CountEF MaxLexFgivenE MaxLexEgivenF IsSingletonF IsSingletonFE Glue WordPenalty PassThrough LanguageModel LanguageModel_OOV Shape_S01111_T11011 Shape_S11110_T11011 Shape_S11100_T11000 Shape_S01110_T01110 Shape_S01111_T01111 Shape_S01100_T11000 Shape_S10000_T10000 Shape_S11100_T11100 Shape_S11110_T11110 Shape_S11110_T11010 Shape_S01100_T11100 Shape_S01000_T01000 Shape_S01010_T01010 Shape_S01111_T01011 Shape_S01100_T01100 Shape_S01110_T11010 Shape_S11000_T11000 Shape_S11000_T01100 IsSupportedOnline ForceRule"),
"dense features")
("debug_output,d", po::value()->default_value(""), "file for debug output");
diff --git a/training/dtrain/sample_net_interface.h b/training/dtrain/sample_net_interface.h
index a2b5f87d..6d00e5d5 100644
--- a/training/dtrain/sample_net_interface.h
+++ b/training/dtrain/sample_net_interface.h
@@ -17,7 +17,7 @@ struct ScoredKbest : public DecoderObserver
vector* ref_ngs_;
vector* ref_ls_;
bool dont_score;
- string viterbiTreeStr_;
+ string viterbiTreeStr_, viterbiRules_;
ScoredKbest(const size_t k, PerSentenceBleuScorer* scorer) :
k_(k), scorer_(scorer), dont_score(false) {}
@@ -44,6 +44,9 @@ struct ScoredKbest : public DecoderObserver
effective_sz_++;
feature_count_ += h.f.size();
viterbiTreeStr_ = hg->show_viterbi_tree(false);
+ ostringstream ss;
+ ViterbiRules(*hg, &ss);
+ viterbiRules_ = ss.str();
}
}
@@ -56,6 +59,7 @@ struct ScoredKbest : public DecoderObserver
inline size_t GetFeatureCount() { return feature_count_; }
inline size_t GetSize() { return effective_sz_; }
inline string GetViterbiTreeStr() { return viterbiTreeStr_; }
+ inline string GetViterbiRules() { return viterbiRules_; }
};
} // namespace
--
cgit v1.2.3