summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--training/dtrain/Makefile.am2
-rw-r--r--training/dtrain/dtrain_net_interface.cc124
-rw-r--r--training/dtrain/dtrain_net_interface.h4
-rw-r--r--training/dtrain/sample_net_interface.h2
-rw-r--r--training/dtrain/score.h6
-rw-r--r--training/dtrain/score_net_interface.h2
6 files changed, 113 insertions, 27 deletions
diff --git a/training/dtrain/Makefile.am b/training/dtrain/Makefile.am
index ef256ffe..a0b5545b 100644
--- a/training/dtrain/Makefile.am
+++ b/training/dtrain/Makefile.am
@@ -9,5 +9,5 @@ dtrain_net_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../..
dtrain_net_interface_SOURCES = dtrain_net_interface.cc dtrain_net_interface.h dtrain.h sample_net_interface.h score_net_interface.h update.h
dtrain_net_interface_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a /fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib/libnanomsg.so
-AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/include
+AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/include -I/fast_scratch/simianer/lfpe/cppnanomsg
diff --git a/training/dtrain/dtrain_net_interface.cc b/training/dtrain/dtrain_net_interface.cc
index 265bc014..e9612def 100644
--- a/training/dtrain/dtrain_net_interface.cc
+++ b/training/dtrain/dtrain_net_interface.cc
@@ -1,6 +1,6 @@
-#include "dtrain_net.h"
-#include "sample_net.h"
-#include "score.h"
+#include "dtrain_net_interface.h"
+#include "sample_net_interface.h"
+#include "score_net_interface.h"
#include "update.h"
#include <nanomsg/nn.h>
@@ -18,9 +18,11 @@ main(int argc, char** argv)
exit(1); // something is wrong
const size_t k = conf["k"].as<size_t>();
const size_t N = conf["N"].as<size_t>();
+ weight_t eta = conf["learning_rate"].as<weight_t>();
const weight_t margin = conf["margin"].as<weight_t>();
const string master_addr = conf["addr"].as<string>();
const string output_fn = conf["output"].as<string>();
+ const string debug_fn = conf["debug_output"].as<string>();
// setup decoder
register_feature_functions();
@@ -39,7 +41,7 @@ main(int argc, char** argv)
cerr << _p4;
// output configuration
- cerr << "dtrain_net" << endl << "Parameters:" << endl;
+ cerr << "dtrain_net_interface" << endl << "Parameters:" << endl;
cerr << setw(25) << "k " << k << endl;
cerr << setw(25) << "N " << N << endl;
cerr << setw(25) << "margin " << margin << endl;
@@ -53,9 +55,18 @@ main(int argc, char** argv)
string hello = "hello";
sock.send(hello.c_str(), hello.size()+1, 0);
+ // debug
+ ostringstream debug_output;
+
size_t i = 0;
while(true)
{
+ // debug --
+ debug_output.str(string());
+ debug_output.clear();
+ debug_output << "{" << endl;
+ // -- debug
+
char *buf = NULL;
string source;
vector<Ngrams> refs;
@@ -65,29 +76,33 @@ main(int argc, char** argv)
if (buf) {
const string in(buf, buf+sz);
nn::freemsg(buf);
- cerr << "got input '" << in << "'" << endl;
+ cerr << "[dtrain] got input '" << in << "'" << endl;
if (in == "shutdown") { // shut down
- cerr << "got shutdown signal" << endl;
+ cerr << "[dtrain] got shutdown signal" << endl;
next = false;
} else { // translate
vector<string> parts;
boost::algorithm::split_regex(parts, in, boost::regex(" \\|\\|\\| "));
if (parts[0] == "act:translate") {
- cerr << "translating ..." << endl;
+ cerr << "[dtrain] translating ..." << endl;
lambdas.init_vector(&decoder_weights);
observer->dont_score = true;
decoder.Decode(parts[1], observer);
observer->dont_score = false;
vector<ScoredHyp>* samples = observer->GetSamples();
ostringstream os;
- cerr << "1best features " << (*samples)[0].f << endl;
+ cerr << "[dtrain] 1best features " << (*samples)[0].f << endl;
PrintWordIDVec((*samples)[0].w, os);
sock.send(os.str().c_str(), os.str().size()+1, 0);
- cerr << "> done translating, looping" << endl;
+ cerr << "[dtrain] done translating, looping again" << endl;
continue;
} else { // learn
- cerr << "learning ..." << endl;
+ cerr << "[dtrain] learning ..." << endl;
source = parts[0];
+ // debug --
+ debug_output << "\"source\":\"" << source.substr(source.find_first_of(">")+1, source.find_last_of("<")-3) << "\"," << endl;
+ debug_output << "\"target\":\"" << parts[1] << "\"," << endl;
+ // -- debug
parts.erase(parts.begin());
for (auto s: parts) {
vector<WordID> r;
@@ -110,24 +125,95 @@ main(int argc, char** argv)
observer->SetReference(refs, rsz);
decoder.Decode(source, observer);
vector<ScoredHyp>* samples = observer->GetSamples();
- cerr << "samples size " << samples->size() << endl;
+
+ // debug --
+ debug_output << "\"1best\":\"";
+ PrintWordIDVec((*samples)[0].w, debug_output);
+ debug_output << "\"," << endl;
+ debug_output << "\"kbest\":[" << endl;
+ size_t h = 0;
+ for (auto s: *samples) {
+ debug_output << "\"" << s.gold << " ||| " << s.model << " ||| " << s.rank << " ||| ";
+ debug_output << "EgivenFCoherent=" << s.f[FD::Convert("EgivenFCoherent")] << " ";
+ debug_output << "SampleCountF=" << s.f[FD::Convert("CountEF")] << " ";
+ debug_output << "MaxLexFgivenE=" << s.f[FD::Convert("MaxLexFgivenE")] << " ";
+ debug_output << "MaxLexEgivenF=" << s.f[FD::Convert("MaxLexEgivenF")] << " ";
+ debug_output << "IsSingletonF=" << s.f[FD::Convert("IsSingletonF")] << " ";
+ debug_output << "IsSingletonFE=" << s.f[FD::Convert("IsSingletonFE")] << " ";
+ debug_output << "Glue=:" << s.f[FD::Convert("Glue")] << " ";
+ debug_output << "WordPenalty=" << s.f[FD::Convert("WordPenalty")] << " ";
+ debug_output << "PassThrough=" << s.f[FD::Convert("PassThrough")] << " ";
+ debug_output << "LanguageModel=" << s.f[FD::Convert("LanguageModel_OOV")];
+ debug_output << " ||| ";
+ PrintWordIDVec(s.w, debug_output);
+ h += 1;
+ debug_output << "\"";
+ if (h < samples->size()) {
+ debug_output << ",";
+ }
+ debug_output << endl;
+ }
+ debug_output << "]," << endl;
+ debug_output << "\"samples_size\":" << samples->size() << "," << endl;
+ debug_output << "\"weights_before\":{" << endl;
+ debug_output << "\"EgivenFCoherent\":" << lambdas[FD::Convert("EgivenFCoherent")] << "," << endl;
+ debug_output << "\"SampleCountF\":" << lambdas[FD::Convert("CountEF")] << "," << endl;
+ debug_output << "\"MaxLexFgivenE\":" << lambdas[FD::Convert("MaxLexFgivenE")] << "," << endl;
+ debug_output << "\"MaxLexEgivenF\":" << lambdas[FD::Convert("MaxLexEgivenF")] << "," << endl;
+ debug_output << "\"IsSingletonF\":" << lambdas[FD::Convert("IsSingletonF")] << "," << endl;
+ debug_output << "\"IsSingletonFE\":" << lambdas[FD::Convert("IsSingletonFE")] << "," << endl;
+ debug_output << "\"Glue\":" << lambdas[FD::Convert("Glue")] << "," << endl;
+ debug_output << "\"WordPenalty\":" << lambdas[FD::Convert("WordPenalty")] << "," << endl;
+ debug_output << "\"PassThrough\":" << lambdas[FD::Convert("PassThrough")] << "," << endl;
+ debug_output << "\"LanguageModel\":" << lambdas[FD::Convert("LanguageModel_OOV")] << endl;
+ debug_output << "}," << endl;
+ // -- debug
// get pairs and update
SparseVector<weight_t> updates;
- CollectUpdates(samples, updates, margin);
- cerr << "updates size " << updates.size() << endl;
- cerr << "lambdas before " << lambdas << endl;
- lambdas.plus_eq_v_times_s(updates, 1.0); // FIXME: learning rate?
- cerr << "lambdas after " << lambdas << endl;
+ size_t num_up = CollectUpdates(samples, updates, margin);
+
+ // debug --
+ debug_output << "\"num_up\":" << num_up << "," << endl;
+ debug_output << "\"updated_features\":" << updates.size() << "," << endl;
+ debug_output << "\"learning_rate\":" << eta << "," << endl;
+ debug_output << "\"best_match\":\"";
+ PrintWordIDVec((*samples)[0].w, debug_output);
+ debug_output << "\"," << endl;
+ debug_output << "\"best_match_score\":" << (*samples)[0].gold << "," << endl ;
+ // -- debug
+
+ lambdas.plus_eq_v_times_s(updates, eta);
i++;
- cerr << "> done learning, looping" << endl;
+ // debug --
+ debug_output << "\"weights_after\":{" << endl;
+ debug_output << "\"EgivenFCoherent\":" << lambdas[FD::Convert("EgivenFCoherent")] << "," << endl;
+ debug_output << "\"SampleCountF\":" << lambdas[FD::Convert("CountEF")] << "," << endl;
+ debug_output << "\"MaxLexFgivenE\":" << lambdas[FD::Convert("MaxLexFgivenE")] << "," << endl;
+ debug_output << "\"MaxLexEgivenF\":" << lambdas[FD::Convert("MaxLexEgivenF")] << "," << endl;
+ debug_output << "\"IsSingletonF\":" << lambdas[FD::Convert("IsSingletonF")] << "," << endl;
+ debug_output << "\"IsSingletonFE\":" << lambdas[FD::Convert("IsSingletonFE")] << "," << endl;
+ debug_output << "\"Glue\":" << lambdas[FD::Convert("Glue")] << "," << endl;
+ debug_output << "\"WordPenalty\":" << lambdas[FD::Convert("WordPenalty")] << "," << endl;
+ debug_output << "\"PassThrough\":" << lambdas[FD::Convert("PassThrough")] << "," << endl;
+ debug_output << "\"LanguageModel\":" << lambdas[FD::Convert("LanguageModel_OOV")] << endl;
+ debug_output << "}" << endl;
+ debug_output << "}" << endl;
+ // -- debug
+
+ cerr << "[dtrain] done learning, looping again" << endl;
string done = "done";
sock.send(done.c_str(), done.size()+1, 0);
+
+ // debug --
+ WriteFile f(debug_fn);
+ *f << debug_output.str();
+ // -- debug
} // input loop
if (output_fn != "") {
- cerr << "writing final weights to '" << output_fn << "'" << endl;
+ cerr << "[dtrain] writing final weights to '" << output_fn << "'" << endl;
lambdas.init_vector(decoder_weights);
Weights::WriteToFile(output_fn, decoder_weights, true);
}
@@ -135,7 +221,7 @@ main(int argc, char** argv)
string shutdown = "off";
sock.send(shutdown.c_str(), shutdown.size()+1, 0);
- cerr << "shutting down, goodbye" << endl;
+ cerr << "[dtrain] shutting down, goodbye" << endl;
return 0;
}
diff --git a/training/dtrain/dtrain_net_interface.h b/training/dtrain/dtrain_net_interface.h
index 1c724b55..2c539930 100644
--- a/training/dtrain/dtrain_net_interface.h
+++ b/training/dtrain/dtrain_net_interface.h
@@ -1,5 +1,5 @@
-#ifndef _DTRAIN_NET_H_
-#define _DTRAIN_NET_H_
+#ifndef _DTRAIN_NET_INTERFACE_H_
+#define _DTRAIN_NET_INTERFACE_H_
#include "dtrain.h"
diff --git a/training/dtrain/sample_net_interface.h b/training/dtrain/sample_net_interface.h
index 497149d9..98b10c82 100644
--- a/training/dtrain/sample_net_interface.h
+++ b/training/dtrain/sample_net_interface.h
@@ -3,7 +3,7 @@
#include "kbest.h"
-#include "score.h"
+#include "score_net_interface.h"
namespace dtrain
{
diff --git a/training/dtrain/score.h b/training/dtrain/score.h
index d909dbf3..e6e60acb 100644
--- a/training/dtrain/score.h
+++ b/training/dtrain/score.h
@@ -1,5 +1,5 @@
-#ifndef _DTRAIN_SCORE_NET_INTERFACE_H_
-#define _DTRAIN_SCORE_NET_INTERFACE_H_
+#ifndef _DTRAIN_SCORE_H_
+#define _DTRAIN_SCORE_H_
#include "dtrain.h"
@@ -153,7 +153,7 @@ struct PerSentenceBleuScorer
size_t best = numeric_limits<size_t>::max();
for (auto l: ref_ls) {
size_t d = abs(hl-l);
- if (d < best) {
+ if (d < best) {
best_idx = i;
best = d;
}
diff --git a/training/dtrain/score_net_interface.h b/training/dtrain/score_net_interface.h
index 6e359249..58357cf6 100644
--- a/training/dtrain/score_net_interface.h
+++ b/training/dtrain/score_net_interface.h
@@ -153,7 +153,7 @@ struct PerSentenceBleuScorer
size_t best = numeric_limits<size_t>::max();
for (auto l: ref_ls) {
size_t d = abs(hl-l);
- if (d < best) {
+ if (d < best) {
best_idx = i;
best = d;
}