From 1a749f62c19ea77b74a61a5ec747c16fea95f860 Mon Sep 17 00:00:00 2001 From: Patrick Simianer Date: Thu, 14 Jul 2016 21:44:24 +0200 Subject: translate _and_ learn --- training/dtrain/dtrain_net_interface.cc | 98 +++++++++++++++++++++------------ 1 file changed, 63 insertions(+), 35 deletions(-) (limited to 'training') diff --git a/training/dtrain/dtrain_net_interface.cc b/training/dtrain/dtrain_net_interface.cc index 761930f8..ac447517 100644 --- a/training/dtrain/dtrain_net_interface.cc +++ b/training/dtrain/dtrain_net_interface.cc @@ -98,6 +98,8 @@ main(int argc, char** argv) debug_output << "{" << endl; // hack us a nice JSON output // -- debug + bool just_translate = false; + char *buf = NULL; string source; vector refs; @@ -210,36 +212,41 @@ main(int argc, char** argv) } else { // translate vector parts; boost::algorithm::split_regex(parts, in, boost::regex(" \\|\\|\\| ")); - if (parts[0] == "act:translate") { + if (parts[0] == "act:translate" || parts[0] == "act:translate_learn") { + if (parts[0] == "act:translate") + just_translate = true; cerr << "[dtrain] translating ..." << endl; lambdas.init_vector(&decoder_weights); observer->dont_score = true; decoder.Decode(parts[1], observer); observer->dont_score = false; samples = observer->GetSamples(); - ostringstream os; - cerr << "[dtrain] 1best features " << (*samples)[0].f << endl; - if (output_derivation) { - os << observer->GetViterbiTreeStr() << endl; - } else { - PrintWordIDVec((*samples)[0].w, os); - } - if (output_rules) { - os << observer->GetViterbiRules() << endl; + if (parts[0] == "act:translate") { + ostringstream os; + cerr << "[dtrain] 1best features " << (*samples)[0].f << endl; + if (output_derivation) { + os << observer->GetViterbiTreeStr() << endl; + } else { + PrintWordIDVec((*samples)[0].w, os); + } + if (output_rules) { + os << observer->GetViterbiRules() << endl; + } + sock.send(os.str().c_str(), os.str().size()+1, 0); + cerr << "[dtrain] done translating, looping again" << endl; } - sock.send(os.str().c_str(), os.str().size()+1, 0); - cerr << "[dtrain] done translating, looping again" << endl; - continue; - } else { // learn + } //else { // learn + if (!just_translate) { cerr << "[dtrain] learning ..." << endl; - source = parts[0]; + source = parts[1]; // debug -- debug_output << "\"source\":\"" - << source.substr(source.find_first_of(">")+2, source.find_last_of(">")-6) + << escapeJson(source.substr(source.find_first_of(">")+2, source.find_last_of(">")-6)) << "\"," << endl; - debug_output << "\"target\":\"" << parts[1] << "\"," << endl; + debug_output << "\"target\":\"" << escapeJson(parts[2]) << "\"," << endl; // -- debug parts.erase(parts.begin()); + parts.erase(parts.begin()); for (auto s: parts) { vector r; vector toks; @@ -252,6 +259,8 @@ main(int argc, char** argv) for (size_t r = 0; r < samples->size(); r++) (*samples)[r].gold = observer->scorer_->Score((*samples)[r].w, refs, rsz); + //} + //} } } } @@ -262,9 +271,10 @@ main(int argc, char** argv) // decode lambdas.init_vector(&decoder_weights); - // debug -- - debug_output << "\"1best\":\""; - PrintWordIDVec((*samples)[0].w, debug_output); + // debug --) + ostringstream os; + PrintWordIDVec((*samples)[0].w, os); + debug_output << "\"1best\":\"" << escapeJson(os.str()); debug_output << "\"," << endl; debug_output << "\"kbest\":[" << endl; size_t h = 0; @@ -272,9 +282,11 @@ main(int argc, char** argv) debug_output << "\"" << s.gold << " ||| " << s.model << " ||| " << s.rank << " ||| "; for (auto o: s.f) - debug_output << FD::Convert(o.first) << "=" << o.second << " "; + debug_output << escapeJson(FD::Convert(o.first)) << "=" << o.second << " "; debug_output << " ||| "; - PrintWordIDVec(s.w, debug_output); + ostringstream os; + PrintWordIDVec(s.w, os); + debug_output << escapeJson(os.str()); h += 1; debug_output << "\""; if (h < samples->size()) { @@ -296,8 +308,12 @@ main(int argc, char** argv) size_t num_up = CollectUpdates(samples, update, margin); // debug -- - debug_output << "\"1best_features\":\"" << (*samples)[0].f << "\"," << endl; - debug_output << "\"update_raw\":\"" << update << "\"," << endl; + debug_output << "\"1best_features\":{"; + sparseVectorToJson((*samples)[0].f, debug_output); + debug_output << "}," << endl; + debug_output << "\"update_raw\":{"; + sparseVectorToJson(update, debug_output); + debug_output << "}," << endl; // -- debug // update @@ -318,11 +334,16 @@ main(int argc, char** argv) } } } - lambdas += update; - i++; + if (!just_translate) { + lambdas += update; + } else { + i++; + } // debug -- - debug_output << "\"update\":\"" << update << "\"," << endl; + debug_output << "\"update\":{"; + sparseVectorToJson(update, debug_output); + debug_output << "}," << endl; debug_output << "\"num_up\":" << num_up << "," << endl; debug_output << "\"updated_features\":" << update.size() << "," << endl; debug_output << "\"learning_rate_R\":" << learning_rate_R << "," << endl; @@ -332,7 +353,9 @@ main(int argc, char** argv) sparseVectorToJson(learning_rates, debug_output); debug_output << "}," << endl; debug_output << "\"best_match\":\""; - PrintWordIDVec((*samples)[0].w, debug_output); + ostringstream ps; + PrintWordIDVec((*samples)[0].w, ps); + debug_output << escapeJson(ps.str()); debug_output << "\"," << endl; debug_output << "\"best_match_score\":" << (*samples)[0].gold << "," << endl ; // -- debug @@ -344,9 +367,6 @@ main(int argc, char** argv) debug_output << "}" << endl; // -- debug - cerr << "[dtrain] done learning, looping again" << endl; - sock.send(done.c_str(), done.size()+1, 0); - // debug -- WriteFile f(debug_fn); f.get() << debug_output.str(); @@ -354,10 +374,18 @@ main(int argc, char** argv) // -- debug // write current weights - lambdas.init_vector(decoder_weights); - ostringstream fn; - fn << output_fn << "." << i << ".gz"; - Weights::WriteToFile(fn.str(), decoder_weights, true); + if (!just_translate) { + lambdas.init_vector(decoder_weights); + ostringstream fn; + fn << output_fn << "." << i << ".gz"; + Weights::WriteToFile(fn.str(), decoder_weights, true); + } + + if (!just_translate) { + cerr << "[dtrain] done learning, looping again" << endl; + sock.send(done.c_str(), done.size()+1, 0); + } + } // input loop string shutdown = "off"; -- cgit v1.2.3