From f412aaab3d10fb82b20a2190f2cb1424959c599a Mon Sep 17 00:00:00 2001 From: redpony Date: Wed, 29 Sep 2010 20:45:48 +0000 Subject: another feature, another POS git-svn-id: https://ws10smt.googlecode.com/svn/trunk@664 ec762483-ff6d-05da-a07a-a48fb63a330f --- decoder/cdec_ff.cc | 1 + decoder/ff.cc | 4 ++- decoder/ff_wordalign.cc | 66 +++++++++++++++++++++++++++++++++++++++++ decoder/ff_wordalign.h | 20 +++++++++++++ training/mpi_online_optimize.cc | 18 ++++++----- 5 files changed, 101 insertions(+), 8 deletions(-) diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 84ba19fa..c0c595a5 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -46,6 +46,7 @@ void register_feature_functions() { ff_registry.Register("Model2BinaryFeatures", new FFFactory); ff_registry.Register("MarkovJump", new FFFactory); ff_registry.Register("MarkovJumpFClass", new FFFactory); + ff_registry.Register("SourceBigram", new FFFactory); ff_registry.Register("SourcePOSBigram", new FFFactory); ff_registry.Register("BlunsomSynchronousParseHack", new FFFactory); ff_registry.Register("AlignerResults", new FFFactory); diff --git a/decoder/ff.cc b/decoder/ff.cc index 7bdd21e3..a32c0dcb 100644 --- a/decoder/ff.cc +++ b/decoder/ff.cc @@ -171,7 +171,9 @@ void ModelSet::AddFeaturesToEdge(const SentenceMetadata& smeta, prob_t* combination_cost_estimate) const { edge->reset_info(); context->resize(state_size_); - memset(&(*context)[0], 0, state_size_); + if (state_size_ > 0) { + memset(&(*context)[0], 0, state_size_); + } SparseVector est_vals; // only computed if combination_cost_estimate is non-NULL if (combination_cost_estimate) *combination_cost_estimate = prob_t::One(); for (int i = 0; i < models_.size(); ++i) { diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc index a1968159..da86b714 100644 --- a/decoder/ff_wordalign.cc +++ b/decoder/ff_wordalign.cc @@ -266,6 +266,72 @@ void MarkovJump::TraversalFeaturesImpl(const SentenceMetadata& smeta, } } +// state: src word used, number of trg words generated +SourceBigram::SourceBigram(const std::string& param) : + FeatureFunction(sizeof(WordID) + sizeof(int)) { +} + +void SourceBigram::FinalTraversalFeatures(const void* context, + SparseVector* features) const { + WordID left = *static_cast(context); + int left_wc = *(static_cast(context) + 1); + if (left_wc == 1) + FireFeature(-1, left, features); + FireFeature(left, -1, features); +} + +void SourceBigram::FireFeature(WordID left, + WordID right, + SparseVector* features) const { + int& fid = fmap_[left][right]; + // TODO important important !!! escape strings !!! + if (!fid) { + ostringstream os; + os << "SB:"; + if (left < 0) { os << "BOS"; } else { os << TD::Convert(left); } + os << '_'; + if (right < 0) { os << "EOS"; } else { os << TD::Convert(right); } + fid = FD::Convert(os.str()); + if (fid == 0) fid = -1; + } + if (fid > 0) features->set_value(fid, 1.0); + int& ufid = ufmap_[left]; + if (!ufid) { + ostringstream os; + os << "SU:"; + if (left < 0) { os << "BOS"; } else { os << TD::Convert(left); } + ufid = FD::Convert(os.str()); + if (ufid == 0) fid = -1; + } + if (ufid > 0) features->set_value(ufid, 1.0); +} + +void SourceBigram::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_contexts, + SparseVector* features, + SparseVector* /* estimated_features */, + void* context) const { + WordID& out_context = *static_cast(context); + int& out_word_count = *(static_cast(context) + 1); + const int arity = edge.Arity(); + if (arity == 0) { + out_context = edge.rule_->f()[0]; + out_word_count = edge.rule_->EWords(); + assert(out_word_count == 1); // this is only defined for lex translation! + // revisit this if you want to translate into null words + } else if (arity == 2) { + WordID left = *static_cast(ant_contexts[0]); + WordID right = *static_cast(ant_contexts[1]); + int left_wc = *(static_cast(ant_contexts[0]) + 1); + int right_wc = *(static_cast(ant_contexts[0]) + 1); + if (left_wc == 1 && right_wc == 1) + FireFeature(-1, left, features); + FireFeature(left, right, features); + out_word_count = left_wc + right_wc; + out_context = right; + } +} // state: POS of src word used, number of trg words generated SourcePOSBigram::SourcePOSBigram(const std::string& param) : FeatureFunction(sizeof(WordID) + sizeof(int)) { diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h index c44ad26b..ebbecfea 100644 --- a/decoder/ff_wordalign.h +++ b/decoder/ff_wordalign.h @@ -78,6 +78,26 @@ class MarkovJumpFClass : public FeatureFunction { typedef std::map Class2FID; typedef std::map Class2Class2FID; +class SourceBigram : public FeatureFunction { + public: + SourceBigram(const std::string& param); + virtual void FinalTraversalFeatures(const void* context, + SparseVector* features) const; + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const; + private: + void FireFeature(WordID src, + WordID trg, + SparseVector* features) const; + mutable Class2Class2FID fmap_; + mutable Class2FID ufmap_; +}; + class SourcePOSBigram : public FeatureFunction { public: SourcePOSBigram(const std::string& param); diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc index 0c032c01..d662e8bd 100644 --- a/training/mpi_online_optimize.cc +++ b/training/mpi_online_optimize.cc @@ -215,10 +215,9 @@ int main(int argc, char** argv) { mpi::communicator world; const int size = world.size(); const int rank = world.rank(); - SetSilent(true); // turn off verbose decoder output - cerr << "MPI: I am " << rank << '/' << size << endl; + if (size > 1) SetSilent(true); // turn off verbose decoder output register_feature_functions(); - MT19937* rng = NULL; + std::tr1::shared_ptr rng; po::variables_map conf; InitCommandLine(argc, argv, &conf); @@ -272,9 +271,9 @@ int main(int argc, char** argv) { for (unsigned i = 0; i < order.size(); ++i) order[i]=i; // randomize corpus if (conf.count("random_seed")) - rng = new MT19937(conf["random_seed"].as()); + rng.reset(new MT19937(conf["random_seed"].as())); else - rng = new MT19937; + rng.reset(new MT19937); } SparseVector x; weights.InitSparseVector(&x); @@ -283,6 +282,7 @@ int main(int argc, char** argv) { double objective = 0; bool converged = false; + int write_weights_every_ith = 100; // TODO configure int iter = -1; vector lambdas; while (!converged) { @@ -296,6 +296,10 @@ int main(int argc, char** argv) { ShowLargestFeatures(lambdas); string fname = "weights.cur.gz"; if (converged) { fname = "weights.final.gz"; } + if (iter % write_weights_every_ith == 0) { + ostringstream o; o << "weights." << iter << ".gz"; + fname = o.str(); + } ostringstream vv; vv << "Objective = " << objective; // << " (eval count=" << o->EvaluationCount() << ")"; const string svv = vv.str(); @@ -304,12 +308,12 @@ int main(int argc, char** argv) { if (fully_random || size * size_per_proc * miter > corpus.size()) { if (rank == 0) - Shuffle(&order, rng); + Shuffle(&order, rng.get()); miter = 0; broadcast(world, order, 0); } if (rank == 0) - cerr << "iter=" << iter << " minibatch=" << size_per_proc << " sentences/proc x " << size << " procs. num_feats=" << x.size() << " passes_thru_data=" << (iter * batch_size / static_cast(corpus.size())) << " eta=" << lr->eta(iter) << endl; + cerr << "iter=" << iter << " minibatch=" << size_per_proc << " sentences/proc x " << size << " procs. num_feats=" << x.size() << '/' << FD::NumFeats() << " passes_thru_data=" << (iter * batch_size / static_cast(corpus.size())) << " eta=" << lr->eta(iter) << endl; const int beg = size * miter * size_per_proc + rank * size_per_proc; const int end = beg + size_per_proc; -- cgit v1.2.3