From f9cac877cd3fd1af5d28c0368c8f3ec7d19da103 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 23 Jun 2015 17:11:00 +0200 Subject: vanilla per-sentence bleu --- training/dtrain/score.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'training/dtrain/score.h') diff --git a/training/dtrain/score.h b/training/dtrain/score.h index 06dbc5a4..b0b73e94 100644 --- a/training/dtrain/score.h +++ b/training/dtrain/score.h @@ -189,7 +189,8 @@ struct PerSentenceBleuScorer / ((counts.sum_[i] + add))); } - return BrevityPenalty(hl, rl+1) * exp(sum); + //return BrevityPenalty(hl, rl+1) * exp(sum); + return BrevityPenalty(hl, rl) * exp(sum); } }; -- cgit v1.2.3 From febf178153138a536ba80ee7e5a1ebc61fbaea3b Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Tue, 23 Jun 2015 17:15:51 +0200 Subject: vanilla per-sentence bleu --- training/dtrain/score.h | 3 +- training/dtrain/score_net_interface.h | 200 ++++++++++++++++++++++++++++++++++ 2 files changed, 201 insertions(+), 2 deletions(-) create mode 100644 training/dtrain/score_net_interface.h (limited to 'training/dtrain/score.h') diff --git a/training/dtrain/score.h b/training/dtrain/score.h index b0b73e94..06dbc5a4 100644 --- a/training/dtrain/score.h +++ b/training/dtrain/score.h @@ -189,8 +189,7 @@ struct PerSentenceBleuScorer / ((counts.sum_[i] + add))); } - //return BrevityPenalty(hl, rl+1) * exp(sum); - return BrevityPenalty(hl, rl) * exp(sum); + return BrevityPenalty(hl, rl+1) * exp(sum); } }; diff --git a/training/dtrain/score_net_interface.h b/training/dtrain/score_net_interface.h new file mode 100644 index 00000000..6e359249 --- /dev/null +++ b/training/dtrain/score_net_interface.h @@ -0,0 +1,200 @@ +#ifndef _DTRAIN_SCORE_NET_INTERFACE_H_ +#define _DTRAIN_SCORE_NET_INTERFACE_H_ + +#include "dtrain.h" + +namespace dtrain +{ + +struct NgramCounts +{ + size_t N_; + map clipped_; + map sum_; + + NgramCounts(const size_t N) : N_(N) { Zero(); } + + inline void + operator+=(const NgramCounts& rhs) + { + if (rhs.N_ > N_) Resize(rhs.N_); + for (size_t i = 0; i < N_; i++) { + this->clipped_[i] += rhs.clipped_.find(i)->second; + this->sum_[i] += rhs.sum_.find(i)->second; + } + } + + inline const NgramCounts + operator+(const NgramCounts &other) const + { + NgramCounts result = *this; + result += other; + + return result; + } + + inline void + Add(const size_t count, const size_t ref_count, const size_t i) + { + assert(i < N_); + if (count > ref_count) { + clipped_[i] += ref_count; + } else { + clipped_[i] += count; + } + sum_[i] += count; + } + + inline void + Zero() + { + for (size_t i = 0; i < N_; i++) { + clipped_[i] = 0.; + sum_[i] = 0.; + } + } + + inline void + Resize(size_t N) + { + if (N == N_) return; + else if (N > N_) { + for (size_t i = N_; i < N; i++) { + clipped_[i] = 0.; + sum_[i] = 0.; + } + } else { // N < N_ + for (size_t i = N_-1; i > N-1; i--) { + clipped_.erase(i); + sum_.erase(i); + } + } + N_ = N; + } +}; + +typedef map, size_t> Ngrams; + +inline Ngrams +MakeNgrams(const vector& s, const size_t N) +{ + Ngrams ngrams; + vector ng; + for (size_t i = 0; i < s.size(); i++) { + ng.clear(); + for (size_t j = i; j < min(i+N, s.size()); j++) { + ng.push_back(s[j]); + ngrams[ng]++; + } + } + + return ngrams; +} + +inline NgramCounts +MakeNgramCounts(const vector& hyp, + const vector& ref, + const size_t N) +{ + Ngrams hyp_ngrams = MakeNgrams(hyp, N); + NgramCounts counts(N); + Ngrams::iterator it, ti; + for (it = hyp_ngrams.begin(); it != hyp_ngrams.end(); it++) { + size_t max_ref_count = 0; + for (auto r: ref) { + ti = r.find(it->first); + if (ti != r.end()) + max_ref_count = max(max_ref_count, ti->second); + } + counts.Add(it->second, min(it->second, max_ref_count), it->first.size()-1); + } + + return counts; +} + +/* + * per-sentence BLEU + * as in "Optimizing for Sentence-Level BLEU+1 + * Yields Short Translations" + * (Nakov et al. '12) + * + * [simply add 1 to reference length for calculation of BP] + * + */ +struct PerSentenceBleuScorer +{ + const size_t N_; + vector w_; + + PerSentenceBleuScorer(size_t n) : N_(n) + { + for (size_t i = 1; i <= N_; i++) + w_.push_back(1.0/N_); + } + + inline weight_t + BrevityPenalty(const size_t hl, const size_t rl) + { + if (hl > rl) + return 1; + + return exp(1 - (weight_t)rl/hl); + } + + inline size_t + BestMatchLength(const size_t hl, + const vector& ref_ls) + { + size_t m; + if (ref_ls.size() == 1) { + m = ref_ls.front(); + } else { + size_t i = 0, best_idx = 0; + size_t best = numeric_limits::max(); + for (auto l: ref_ls) { + size_t d = abs(hl-l); + if (d < best) { + best_idx = i; + best = d; + } + i += 1; + } + m = ref_ls[best_idx]; + } + + return m; + } + + weight_t + Score(const vector& hyp, + const vector& ref_ngs, + const vector& ref_ls) + { + size_t hl = hyp.size(), rl = 0; + if (hl == 0) return 0.; + rl = BestMatchLength(hl, ref_ls); + if (rl == 0) return 0.; + NgramCounts counts = MakeNgramCounts(hyp, ref_ngs, N_); + size_t M = N_; + vector v = w_; + if (rl < N_) { + M = rl; + for (size_t i = 0; i < M; i++) v[i] = 1/((weight_t)M); + } + weight_t sum = 0, add = 0; + for (size_t i = 0; i < M; i++) { + if (i == 0 && (counts.sum_[i] == 0 || counts.clipped_[i] == 0)) return 0.; + if (i > 0) add = 1; + sum += v[i] * log(((weight_t)counts.clipped_[i] + add) + / ((counts.sum_[i] + add))); + } + + //return BrevityPenalty(hl, rl+1) * exp(sum); + return BrevityPenalty(hl, rl) * exp(sum); + } +}; + +} // namespace + +#endif + -- cgit v1.2.3 From 7ee5ab698194ed52796161ace2cbfc4fbe711f3c Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Wed, 24 Jun 2015 11:30:57 +0200 Subject: fix --- training/dtrain/score.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'training/dtrain/score.h') diff --git a/training/dtrain/score.h b/training/dtrain/score.h index 06dbc5a4..d909dbf3 100644 --- a/training/dtrain/score.h +++ b/training/dtrain/score.h @@ -1,5 +1,5 @@ -#ifndef _DTRAIN_SCORE_H_ -#define _DTRAIN_SCORE_H_ +#ifndef _DTRAIN_SCORE_NET_INTERFACE_H_ +#define _DTRAIN_SCORE_NET_INTERFACE_H_ #include "dtrain.h" -- cgit v1.2.3 From 723cbf8d543ba8be2880b497518c87fc1cbf3573 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Wed, 24 Jun 2015 18:51:21 +0200 Subject: dtrain_net_interace: cleanup --- training/dtrain/Makefile.am | 2 +- training/dtrain/dtrain_net_interface.cc | 124 +++++++++++++++++++++++++++----- training/dtrain/dtrain_net_interface.h | 4 +- training/dtrain/sample_net_interface.h | 2 +- training/dtrain/score.h | 6 +- training/dtrain/score_net_interface.h | 2 +- 6 files changed, 113 insertions(+), 27 deletions(-) (limited to 'training/dtrain/score.h') diff --git a/training/dtrain/Makefile.am b/training/dtrain/Makefile.am index ef256ffe..a0b5545b 100644 --- a/training/dtrain/Makefile.am +++ b/training/dtrain/Makefile.am @@ -9,5 +9,5 @@ dtrain_net_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../.. dtrain_net_interface_SOURCES = dtrain_net_interface.cc dtrain_net_interface.h dtrain.h sample_net_interface.h score_net_interface.h update.h dtrain_net_interface_LDADD = ../../decoder/libcdec.a ../../klm/search/libksearch.a ../../mteval/libmteval.a ../../utils/libutils.a ../../klm/lm/libklm.a ../../klm/util/libklm_util.a ../../klm/util/double-conversion/libklm_util_double.a /fast_scratch/simianer/lfpe/nanomsg-0.5-beta/lib/libnanomsg.so -AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/include +AM_CPPFLAGS = -W -Wall -Wno-sign-compare -I$(top_srcdir)/utils -I$(top_srcdir)/decoder -I$(top_srcdir)/mteval -I/fast_scratch/simianer/lfpe/nanomsg-0.5-beta/include -I/fast_scratch/simianer/lfpe/cppnanomsg diff --git a/training/dtrain/dtrain_net_interface.cc b/training/dtrain/dtrain_net_interface.cc index 265bc014..e9612def 100644 --- a/training/dtrain/dtrain_net_interface.cc +++ b/training/dtrain/dtrain_net_interface.cc @@ -1,6 +1,6 @@ -#include "dtrain_net.h" -#include "sample_net.h" -#include "score.h" +#include "dtrain_net_interface.h" +#include "sample_net_interface.h" +#include "score_net_interface.h" #include "update.h" #include @@ -18,9 +18,11 @@ main(int argc, char** argv) exit(1); // something is wrong const size_t k = conf["k"].as(); const size_t N = conf["N"].as(); + weight_t eta = conf["learning_rate"].as(); const weight_t margin = conf["margin"].as(); const string master_addr = conf["addr"].as(); const string output_fn = conf["output"].as(); + const string debug_fn = conf["debug_output"].as(); // setup decoder register_feature_functions(); @@ -39,7 +41,7 @@ main(int argc, char** argv) cerr << _p4; // output configuration - cerr << "dtrain_net" << endl << "Parameters:" << endl; + cerr << "dtrain_net_interface" << endl << "Parameters:" << endl; cerr << setw(25) << "k " << k << endl; cerr << setw(25) << "N " << N << endl; cerr << setw(25) << "margin " << margin << endl; @@ -53,9 +55,18 @@ main(int argc, char** argv) string hello = "hello"; sock.send(hello.c_str(), hello.size()+1, 0); + // debug + ostringstream debug_output; + size_t i = 0; while(true) { + // debug -- + debug_output.str(string()); + debug_output.clear(); + debug_output << "{" << endl; + // -- debug + char *buf = NULL; string source; vector refs; @@ -65,29 +76,33 @@ main(int argc, char** argv) if (buf) { const string in(buf, buf+sz); nn::freemsg(buf); - cerr << "got input '" << in << "'" << endl; + cerr << "[dtrain] got input '" << in << "'" << endl; if (in == "shutdown") { // shut down - cerr << "got shutdown signal" << endl; + cerr << "[dtrain] got shutdown signal" << endl; next = false; } else { // translate vector parts; boost::algorithm::split_regex(parts, in, boost::regex(" \\|\\|\\| ")); if (parts[0] == "act:translate") { - cerr << "translating ..." << endl; + cerr << "[dtrain] translating ..." << endl; lambdas.init_vector(&decoder_weights); observer->dont_score = true; decoder.Decode(parts[1], observer); observer->dont_score = false; vector* samples = observer->GetSamples(); ostringstream os; - cerr << "1best features " << (*samples)[0].f << endl; + cerr << "[dtrain] 1best features " << (*samples)[0].f << endl; PrintWordIDVec((*samples)[0].w, os); sock.send(os.str().c_str(), os.str().size()+1, 0); - cerr << "> done translating, looping" << endl; + cerr << "[dtrain] done translating, looping again" << endl; continue; } else { // learn - cerr << "learning ..." << endl; + cerr << "[dtrain] learning ..." << endl; source = parts[0]; + // debug -- + debug_output << "\"source\":\"" << source.substr(source.find_first_of(">")+1, source.find_last_of("<")-3) << "\"," << endl; + debug_output << "\"target\":\"" << parts[1] << "\"," << endl; + // -- debug parts.erase(parts.begin()); for (auto s: parts) { vector r; @@ -110,24 +125,95 @@ main(int argc, char** argv) observer->SetReference(refs, rsz); decoder.Decode(source, observer); vector* samples = observer->GetSamples(); - cerr << "samples size " << samples->size() << endl; + + // debug -- + debug_output << "\"1best\":\""; + PrintWordIDVec((*samples)[0].w, debug_output); + debug_output << "\"," << endl; + debug_output << "\"kbest\":[" << endl; + size_t h = 0; + for (auto s: *samples) { + debug_output << "\"" << s.gold << " ||| " << s.model << " ||| " << s.rank << " ||| "; + debug_output << "EgivenFCoherent=" << s.f[FD::Convert("EgivenFCoherent")] << " "; + debug_output << "SampleCountF=" << s.f[FD::Convert("CountEF")] << " "; + debug_output << "MaxLexFgivenE=" << s.f[FD::Convert("MaxLexFgivenE")] << " "; + debug_output << "MaxLexEgivenF=" << s.f[FD::Convert("MaxLexEgivenF")] << " "; + debug_output << "IsSingletonF=" << s.f[FD::Convert("IsSingletonF")] << " "; + debug_output << "IsSingletonFE=" << s.f[FD::Convert("IsSingletonFE")] << " "; + debug_output << "Glue=:" << s.f[FD::Convert("Glue")] << " "; + debug_output << "WordPenalty=" << s.f[FD::Convert("WordPenalty")] << " "; + debug_output << "PassThrough=" << s.f[FD::Convert("PassThrough")] << " "; + debug_output << "LanguageModel=" << s.f[FD::Convert("LanguageModel_OOV")]; + debug_output << " ||| "; + PrintWordIDVec(s.w, debug_output); + h += 1; + debug_output << "\""; + if (h < samples->size()) { + debug_output << ","; + } + debug_output << endl; + } + debug_output << "]," << endl; + debug_output << "\"samples_size\":" << samples->size() << "," << endl; + debug_output << "\"weights_before\":{" << endl; + debug_output << "\"EgivenFCoherent\":" << lambdas[FD::Convert("EgivenFCoherent")] << "," << endl; + debug_output << "\"SampleCountF\":" << lambdas[FD::Convert("CountEF")] << "," << endl; + debug_output << "\"MaxLexFgivenE\":" << lambdas[FD::Convert("MaxLexFgivenE")] << "," << endl; + debug_output << "\"MaxLexEgivenF\":" << lambdas[FD::Convert("MaxLexEgivenF")] << "," << endl; + debug_output << "\"IsSingletonF\":" << lambdas[FD::Convert("IsSingletonF")] << "," << endl; + debug_output << "\"IsSingletonFE\":" << lambdas[FD::Convert("IsSingletonFE")] << "," << endl; + debug_output << "\"Glue\":" << lambdas[FD::Convert("Glue")] << "," << endl; + debug_output << "\"WordPenalty\":" << lambdas[FD::Convert("WordPenalty")] << "," << endl; + debug_output << "\"PassThrough\":" << lambdas[FD::Convert("PassThrough")] << "," << endl; + debug_output << "\"LanguageModel\":" << lambdas[FD::Convert("LanguageModel_OOV")] << endl; + debug_output << "}," << endl; + // -- debug // get pairs and update SparseVector updates; - CollectUpdates(samples, updates, margin); - cerr << "updates size " << updates.size() << endl; - cerr << "lambdas before " << lambdas << endl; - lambdas.plus_eq_v_times_s(updates, 1.0); // FIXME: learning rate? - cerr << "lambdas after " << lambdas << endl; + size_t num_up = CollectUpdates(samples, updates, margin); + + // debug -- + debug_output << "\"num_up\":" << num_up << "," << endl; + debug_output << "\"updated_features\":" << updates.size() << "," << endl; + debug_output << "\"learning_rate\":" << eta << "," << endl; + debug_output << "\"best_match\":\""; + PrintWordIDVec((*samples)[0].w, debug_output); + debug_output << "\"," << endl; + debug_output << "\"best_match_score\":" << (*samples)[0].gold << "," << endl ; + // -- debug + + lambdas.plus_eq_v_times_s(updates, eta); i++; - cerr << "> done learning, looping" << endl; + // debug -- + debug_output << "\"weights_after\":{" << endl; + debug_output << "\"EgivenFCoherent\":" << lambdas[FD::Convert("EgivenFCoherent")] << "," << endl; + debug_output << "\"SampleCountF\":" << lambdas[FD::Convert("CountEF")] << "," << endl; + debug_output << "\"MaxLexFgivenE\":" << lambdas[FD::Convert("MaxLexFgivenE")] << "," << endl; + debug_output << "\"MaxLexEgivenF\":" << lambdas[FD::Convert("MaxLexEgivenF")] << "," << endl; + debug_output << "\"IsSingletonF\":" << lambdas[FD::Convert("IsSingletonF")] << "," << endl; + debug_output << "\"IsSingletonFE\":" << lambdas[FD::Convert("IsSingletonFE")] << "," << endl; + debug_output << "\"Glue\":" << lambdas[FD::Convert("Glue")] << "," << endl; + debug_output << "\"WordPenalty\":" << lambdas[FD::Convert("WordPenalty")] << "," << endl; + debug_output << "\"PassThrough\":" << lambdas[FD::Convert("PassThrough")] << "," << endl; + debug_output << "\"LanguageModel\":" << lambdas[FD::Convert("LanguageModel_OOV")] << endl; + debug_output << "}" << endl; + debug_output << "}" << endl; + // -- debug + + cerr << "[dtrain] done learning, looping again" << endl; string done = "done"; sock.send(done.c_str(), done.size()+1, 0); + + // debug -- + WriteFile f(debug_fn); + *f << debug_output.str(); + // -- debug } // input loop if (output_fn != "") { - cerr << "writing final weights to '" << output_fn << "'" << endl; + cerr << "[dtrain] writing final weights to '" << output_fn << "'" << endl; lambdas.init_vector(decoder_weights); Weights::WriteToFile(output_fn, decoder_weights, true); } @@ -135,7 +221,7 @@ main(int argc, char** argv) string shutdown = "off"; sock.send(shutdown.c_str(), shutdown.size()+1, 0); - cerr << "shutting down, goodbye" << endl; + cerr << "[dtrain] shutting down, goodbye" << endl; return 0; } diff --git a/training/dtrain/dtrain_net_interface.h b/training/dtrain/dtrain_net_interface.h index 1c724b55..2c539930 100644 --- a/training/dtrain/dtrain_net_interface.h +++ b/training/dtrain/dtrain_net_interface.h @@ -1,5 +1,5 @@ -#ifndef _DTRAIN_NET_H_ -#define _DTRAIN_NET_H_ +#ifndef _DTRAIN_NET_INTERFACE_H_ +#define _DTRAIN_NET_INTERFACE_H_ #include "dtrain.h" diff --git a/training/dtrain/sample_net_interface.h b/training/dtrain/sample_net_interface.h index 497149d9..98b10c82 100644 --- a/training/dtrain/sample_net_interface.h +++ b/training/dtrain/sample_net_interface.h @@ -3,7 +3,7 @@ #include "kbest.h" -#include "score.h" +#include "score_net_interface.h" namespace dtrain { diff --git a/training/dtrain/score.h b/training/dtrain/score.h index d909dbf3..e6e60acb 100644 --- a/training/dtrain/score.h +++ b/training/dtrain/score.h @@ -1,5 +1,5 @@ -#ifndef _DTRAIN_SCORE_NET_INTERFACE_H_ -#define _DTRAIN_SCORE_NET_INTERFACE_H_ +#ifndef _DTRAIN_SCORE_H_ +#define _DTRAIN_SCORE_H_ #include "dtrain.h" @@ -153,7 +153,7 @@ struct PerSentenceBleuScorer size_t best = numeric_limits::max(); for (auto l: ref_ls) { size_t d = abs(hl-l); - if (d < best) { + if (d < best) { best_idx = i; best = d; } diff --git a/training/dtrain/score_net_interface.h b/training/dtrain/score_net_interface.h index 6e359249..58357cf6 100644 --- a/training/dtrain/score_net_interface.h +++ b/training/dtrain/score_net_interface.h @@ -153,7 +153,7 @@ struct PerSentenceBleuScorer size_t best = numeric_limits::max(); for (auto l: ref_ls) { size_t d = abs(hl-l); - if (d < best) { + if (d < best) { best_idx = i; best = d; } -- cgit v1.2.3