diff options
-rw-r--r-- | decoder/cdec_ff.cc | 2 | ||||
-rw-r--r-- | decoder/ff_wordalign.cc | 59 | ||||
-rw-r--r-- | decoder/ff_wordalign.h | 21 | ||||
-rw-r--r-- | training/mpi_online_optimize.cc | 19 |
4 files changed, 92 insertions, 9 deletions
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index c0c595a5..3240b6f2 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -54,6 +54,6 @@ void register_feature_functions() { ff_registry.Register("CSplit_ReverseCharLM", new FFFactory<ReverseCharLMCSplitFeature>); ff_registry.Register("Tagger_BigramIdentity", new FFFactory<Tagger_BigramIdentity>); ff_registry.Register("LexicalPairIdentity", new FFFactory<LexicalPairIdentity>); - + ff_registry.Register("LexicalTranslationTrigger", new FFFactory<LexicalTranslationTrigger>); } diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc index da86b714..b4981961 100644 --- a/decoder/ff_wordalign.cc +++ b/decoder/ff_wordalign.cc @@ -266,7 +266,6 @@ void MarkovJump::TraversalFeaturesImpl(const SentenceMetadata& smeta, } } -// state: src word used, number of trg words generated SourceBigram::SourceBigram(const std::string& param) : FeatureFunction(sizeof(WordID) + sizeof(int)) { } @@ -405,6 +404,64 @@ void SourcePOSBigram::TraversalFeaturesImpl(const SentenceMetadata& smeta, } } +LexicalTranslationTrigger::LexicalTranslationTrigger(const std::string& param) : + FeatureFunction(0) { + if (param.empty()) { + cerr << "LexicalTranslationTrigger requires a parameter (file containing triggers)!\n"; + } else { + ReadFile rf(param); + istream& in = *rf.stream(); + string line; + while(in) { + getline(in, line); + if (!in) continue; + vector<WordID> v; + TD::ConvertSentence(line, &v); + triggers_.push_back(v); + } + } +} + +void LexicalTranslationTrigger::FireFeature(WordID trigger, + WordID src, + WordID trg, + SparseVector<double>* features) const { + int& fid = fmap_[trigger][src][trg]; + if (!fid) { + ostringstream os; + os << "T:" << TD::Convert(trigger) << ':' << TD::Convert(src) << '_' << TD::Convert(trg); + fid = FD::Convert(os.str()); + } + features->set_value(fid, 1.0); + + int &tfid = target_fmap_[trigger][trg]; + if (!tfid) { + ostringstream os; + os << "TT:" << TD::Convert(trigger) << ':' << TD::Convert(trg); + tfid = FD::Convert(os.str()); + } + features->set_value(tfid, 1.0); +} + +void LexicalTranslationTrigger::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* /* estimated_features */, + void* context) const { + if (edge.Arity() == 0) { + assert(edge.rule_->EWords() == 1); + assert(edge.rule_->FWords() == 1); + WordID trg = edge.rule_->e()[0]; + WordID src = edge.rule_->f()[0]; + const vector<WordID>& triggers = triggers_[smeta.GetSentenceID()]; + for (int i = 0; i < triggers.size(); ++i) { + FireFeature(triggers[i], src, trg, features); + } + } +} + +// state: src word used, number of trg words generated AlignerResults::AlignerResults(const std::string& param) : cur_sent_(-1), cur_grid_(NULL) { diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h index ebbecfea..0754d70e 100644 --- a/decoder/ff_wordalign.h +++ b/decoder/ff_wordalign.h @@ -78,6 +78,7 @@ class MarkovJumpFClass : public FeatureFunction { typedef std::map<WordID, int> Class2FID; typedef std::map<WordID, Class2FID> Class2Class2FID; +typedef std::map<WordID, Class2Class2FID> Class2Class2Class2FID; class SourceBigram : public FeatureFunction { public: SourceBigram(const std::string& param); @@ -118,6 +119,26 @@ class SourcePOSBigram : public FeatureFunction { std::vector<std::vector<WordID> > pos_; }; +class LexicalTranslationTrigger : public FeatureFunction { + public: + LexicalTranslationTrigger(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* context) const; + private: + void FireFeature(WordID trigger, + WordID src, + WordID trg, + SparseVector<double>* features) const; + mutable Class2Class2Class2FID fmap_; // trigger,src,trg + mutable Class2Class2FID target_fmap_; // trigger,src,trg + std::vector<std::vector<WordID> > triggers_; +}; + class AlignerResults : public FeatureFunction { public: AlignerResults(const std::string& param); diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc index d662e8bd..509fbf15 100644 --- a/training/mpi_online_optimize.cc +++ b/training/mpi_online_optimize.cc @@ -4,10 +4,9 @@ #include <vector> #include <cassert> #include <cmath> +#include <tr1/memory> -#include <mpi.h> #include <boost/mpi.hpp> -#include <boost/shared_ptr.hpp> #include <boost/program_options.hpp> #include <boost/program_options/variables_map.hpp> @@ -24,8 +23,8 @@ #include "sparse_vector.h" #include "sampler.h" + using namespace std; -using boost::shared_ptr; namespace po = boost::program_options; void SanityCheck(const vector<double>& w) { @@ -57,13 +56,14 @@ void ShowLargestFeatures(const vector<double>& w) { cerr << endl; } -void InitCommandLine(int argc, char** argv, po::variables_map* conf) { +bool InitCommandLine(int argc, char** argv, po::variables_map* conf) { po::options_description opts("Configuration options"); opts.add_options() ("input_weights,w",po::value<string>(),"Input feature weights file") ("training_data,t",po::value<string>(),"Training data corpus") ("decoder_config,c",po::value<string>(),"Decoder configuration file") ("output_weights,o",po::value<string>()->default_value("-"),"Output feature weights file") + ("maximum_iteration,i", po::value<unsigned>(), "Maximum number of iterations") ("minibatch_size_per_proc,s", po::value<unsigned>()->default_value(5), "Number of training instances evaluated per processor in each minibatch") ("freeze_feature_set,Z", "The feature set specified in the initial weights file is frozen throughout the duration of training") ("optimization_method,m", po::value<string>()->default_value("sgd"), "Optimization method (sgd)") @@ -89,9 +89,9 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) { if (conf->count("help") || !conf->count("training_data") || !conf->count("decoder_config")) { cerr << dcmdline_options << endl; - MPI::Finalize(); - exit(1); + return false; } + return true; } void ReadTrainingCorpus(const string& fname, vector<string>* c) { @@ -220,7 +220,8 @@ int main(int argc, char** argv) { std::tr1::shared_ptr<MT19937> rng; po::variables_map conf; - InitCommandLine(argc, argv, &conf); + if (!InitCommandLine(argc, argv, &conf)) + return 1; // load initial weights Weights weights; @@ -292,6 +293,10 @@ int main(int argc, char** argv) { observer.Reset(); decoder.SetWeights(lambdas); if (rank == 0) { + if (conf.count("maximum_iteration")) { + if (iter == conf["maximum_iteration"].as<unsigned>()) + converged = true; + } SanityCheck(lambdas); ShowLargestFeatures(lambdas); string fname = "weights.cur.gz"; |