diff options
author | Chris Dyer <redpony@gmail.com> | 2014-11-24 00:16:18 -0500 |
---|---|---|
committer | Chris Dyer <redpony@gmail.com> | 2014-11-24 00:16:18 -0500 |
commit | 41e89b46b19fedfae33637383dd3f055d0f356b5 (patch) | |
tree | 8738aed2092c02c1ae3adf9c5dc5828b67ea8df7 /decoder/decoder.cc | |
parent | af1cea51086ce9188904235635a3a7288304fb35 (diff) | |
parent | 8c0e4a5c1f168a419b3a236a94815c97164bddbc (diff) |
Merge pull request #59 from kho/mrmira
Mr.MIRA compatibility
Diffstat (limited to 'decoder/decoder.cc')
-rw-r--r-- | decoder/decoder.cc | 32 |
1 files changed, 29 insertions, 3 deletions
diff --git a/decoder/decoder.cc b/decoder/decoder.cc index 737201e7..1e6c3194 100644 --- a/decoder/decoder.cc +++ b/decoder/decoder.cc @@ -17,6 +17,7 @@ namespace std { using std::tr1::unordered_map; } #include "fdict.h" #include "timing_stats.h" #include "verbose.h" +#include "b64featvector.h" #include "translator.h" #include "phrasebased_translator.h" @@ -301,6 +302,7 @@ struct DecoderImpl { bool feature_expectations; // TODO Observer bool output_training_vector; // TODO Observer bool remove_intersected_rule_annotations; + bool mr_mira_compat; // Mr.MIRA compatibility mode. boost::scoped_ptr<IncrementalBase> incremental; @@ -415,7 +417,8 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream ("vector_format",po::value<string>()->default_value("b64"), "Sparse vector serialization format for feature expectations or gradients, includes (text or b64)") ("combine_size,C",po::value<int>()->default_value(1), "When option -G is used, process this many sentence pairs before writing the gradient (1=emit after every sentence pair)") ("forest_output,O",po::value<string>(),"Directory to write forests to") - ("remove_intersected_rule_annotations", "After forced decoding is completed, remove nonterminal annotations (i.e., the source side spans)"); + ("remove_intersected_rule_annotations", "After forced decoding is completed, remove nonterminal annotations (i.e., the source side spans)") + ("mr_mira_compat", "Mr.MIRA compatibility mode (applies weight delta if available; outputs number of lines before k-best)"); // ob.AddOptions(&opts); po::options_description clo("Command line options"); @@ -668,6 +671,7 @@ DecoderImpl::DecoderImpl(po::variables_map& conf, int argc, char** argv, istream oracle.show_derivation=conf.count("show_derivations"); oracle.show_derivation_mask=conf["show_derivations_mask"].as<int>(); remove_intersected_rule_annotations = conf.count("remove_intersected_rule_annotations"); + mr_mira_compat = conf.count("mr_mira_compat"); combine_size = conf["combine_size"].as<int>(); if (combine_size < 1) combine_size = 1; @@ -701,6 +705,24 @@ void Decoder::AddSupplementalGrammarFromString(const std::string& grammar_string static_cast<SCFGTranslator&>(*pimpl_->translator).AddSupplementalGrammarFromString(grammar_string); } +static inline void ApplyWeightDelta(const string &delta_b64, vector<weight_t> *weights) { + SparseVector<weight_t> delta; + DecodeFeatureVector(delta_b64, &delta); + if (delta.empty()) return; + // Apply updates + for (SparseVector<weight_t>::iterator dit = delta.begin(); + dit != delta.end(); ++dit) { + int feat_id = dit->first; + union { weight_t weight; unsigned long long repr; } feat_delta; + feat_delta.weight = dit->second; + if (!SILENT) + cerr << "[decoder weight update] " << FD::Convert(feat_id) << " " << feat_delta.weight + << " = " << hex << feat_delta.repr << endl; + if (weights->size() <= feat_id) weights->resize(feat_id + 1); + (*weights)[feat_id] += feat_delta.weight; + } +} + bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { string buf = input; NgramCache::Clear(); // clear ngram cache for remote LM (if used) @@ -711,6 +733,10 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { if (sgml.find("id") != sgml.end()) sent_id = atoi(sgml["id"].c_str()); + // Add delta from input to weights before decoding + if (mr_mira_compat) + ApplyWeightDelta(sgml["delta"], init_weights.get()); + if (!SILENT) { cerr << "\nINPUT: "; if (buf.size() < 100) @@ -949,7 +975,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { if (kbest && !has_ref) { //TODO: does this work properly? const string deriv_fname = conf.count("show_derivations") ? str("show_derivations",conf) : "-"; - oracle.DumpKBest(sent_id, forest, conf["k_best"].as<int>(), unique_kbest,"-", deriv_fname); + oracle.DumpKBest(sent_id, forest, conf["k_best"].as<int>(), unique_kbest,mr_mira_compat, smeta.GetSourceLength(), "-", deriv_fname); } else if (csplit_output_plf) { cout << HypergraphIO::AsPLF(forest, false) << endl; } else { @@ -1080,7 +1106,7 @@ bool DecoderImpl::Decode(const string& input, DecoderObserver* o) { if (conf.count("graphviz")) forest.PrintGraphviz(); if (kbest) { const string deriv_fname = conf.count("show_derivations") ? str("show_derivations",conf) : "-"; - oracle.DumpKBest(sent_id, forest, conf["k_best"].as<int>(), unique_kbest,"-", deriv_fname); + oracle.DumpKBest(sent_id, forest, conf["k_best"].as<int>(), unique_kbest, mr_mira_compat, smeta.GetSourceLength(), "-", deriv_fname); } if (conf.count("show_conditional_prob")) { const prob_t ref_z = Inside<prob_t, EdgeProb>(forest); |