summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--decoder/cdec_ff.cc2
-rw-r--r--decoder/ff_wordalign.cc59
-rw-r--r--decoder/ff_wordalign.h21
-rw-r--r--training/mpi_online_optimize.cc19
4 files changed, 92 insertions, 9 deletions
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc
index c0c595a5..3240b6f2 100644
--- a/decoder/cdec_ff.cc
+++ b/decoder/cdec_ff.cc
@@ -54,6 +54,6 @@ void register_feature_functions() {
ff_registry.Register("CSplit_ReverseCharLM", new FFFactory<ReverseCharLMCSplitFeature>);
ff_registry.Register("Tagger_BigramIdentity", new FFFactory<Tagger_BigramIdentity>);
ff_registry.Register("LexicalPairIdentity", new FFFactory<LexicalPairIdentity>);
-
+ ff_registry.Register("LexicalTranslationTrigger", new FFFactory<LexicalTranslationTrigger>);
}
diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc
index da86b714..b4981961 100644
--- a/decoder/ff_wordalign.cc
+++ b/decoder/ff_wordalign.cc
@@ -266,7 +266,6 @@ void MarkovJump::TraversalFeaturesImpl(const SentenceMetadata& smeta,
}
}
-// state: src word used, number of trg words generated
SourceBigram::SourceBigram(const std::string& param) :
FeatureFunction(sizeof(WordID) + sizeof(int)) {
}
@@ -405,6 +404,64 @@ void SourcePOSBigram::TraversalFeaturesImpl(const SentenceMetadata& smeta,
}
}
+LexicalTranslationTrigger::LexicalTranslationTrigger(const std::string& param) :
+ FeatureFunction(0) {
+ if (param.empty()) {
+ cerr << "LexicalTranslationTrigger requires a parameter (file containing triggers)!\n";
+ } else {
+ ReadFile rf(param);
+ istream& in = *rf.stream();
+ string line;
+ while(in) {
+ getline(in, line);
+ if (!in) continue;
+ vector<WordID> v;
+ TD::ConvertSentence(line, &v);
+ triggers_.push_back(v);
+ }
+ }
+}
+
+void LexicalTranslationTrigger::FireFeature(WordID trigger,
+ WordID src,
+ WordID trg,
+ SparseVector<double>* features) const {
+ int& fid = fmap_[trigger][src][trg];
+ if (!fid) {
+ ostringstream os;
+ os << "T:" << TD::Convert(trigger) << ':' << TD::Convert(src) << '_' << TD::Convert(trg);
+ fid = FD::Convert(os.str());
+ }
+ features->set_value(fid, 1.0);
+
+ int &tfid = target_fmap_[trigger][trg];
+ if (!tfid) {
+ ostringstream os;
+ os << "TT:" << TD::Convert(trigger) << ':' << TD::Convert(trg);
+ tfid = FD::Convert(os.str());
+ }
+ features->set_value(tfid, 1.0);
+}
+
+void LexicalTranslationTrigger::TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* /* estimated_features */,
+ void* context) const {
+ if (edge.Arity() == 0) {
+ assert(edge.rule_->EWords() == 1);
+ assert(edge.rule_->FWords() == 1);
+ WordID trg = edge.rule_->e()[0];
+ WordID src = edge.rule_->f()[0];
+ const vector<WordID>& triggers = triggers_[smeta.GetSentenceID()];
+ for (int i = 0; i < triggers.size(); ++i) {
+ FireFeature(triggers[i], src, trg, features);
+ }
+ }
+}
+
+// state: src word used, number of trg words generated
AlignerResults::AlignerResults(const std::string& param) :
cur_sent_(-1),
cur_grid_(NULL) {
diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h
index ebbecfea..0754d70e 100644
--- a/decoder/ff_wordalign.h
+++ b/decoder/ff_wordalign.h
@@ -78,6 +78,7 @@ class MarkovJumpFClass : public FeatureFunction {
typedef std::map<WordID, int> Class2FID;
typedef std::map<WordID, Class2FID> Class2Class2FID;
+typedef std::map<WordID, Class2Class2FID> Class2Class2Class2FID;
class SourceBigram : public FeatureFunction {
public:
SourceBigram(const std::string& param);
@@ -118,6 +119,26 @@ class SourcePOSBigram : public FeatureFunction {
std::vector<std::vector<WordID> > pos_;
};
+class LexicalTranslationTrigger : public FeatureFunction {
+ public:
+ LexicalTranslationTrigger(const std::string& param);
+ protected:
+ virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta,
+ const Hypergraph::Edge& edge,
+ const std::vector<const void*>& ant_contexts,
+ SparseVector<double>* features,
+ SparseVector<double>* estimated_features,
+ void* context) const;
+ private:
+ void FireFeature(WordID trigger,
+ WordID src,
+ WordID trg,
+ SparseVector<double>* features) const;
+ mutable Class2Class2Class2FID fmap_; // trigger,src,trg
+ mutable Class2Class2FID target_fmap_; // trigger,src,trg
+ std::vector<std::vector<WordID> > triggers_;
+};
+
class AlignerResults : public FeatureFunction {
public:
AlignerResults(const std::string& param);
diff --git a/training/mpi_online_optimize.cc b/training/mpi_online_optimize.cc
index d662e8bd..509fbf15 100644
--- a/training/mpi_online_optimize.cc
+++ b/training/mpi_online_optimize.cc
@@ -4,10 +4,9 @@
#include <vector>
#include <cassert>
#include <cmath>
+#include <tr1/memory>
-#include <mpi.h>
#include <boost/mpi.hpp>
-#include <boost/shared_ptr.hpp>
#include <boost/program_options.hpp>
#include <boost/program_options/variables_map.hpp>
@@ -24,8 +23,8 @@
#include "sparse_vector.h"
#include "sampler.h"
+
using namespace std;
-using boost::shared_ptr;
namespace po = boost::program_options;
void SanityCheck(const vector<double>& w) {
@@ -57,13 +56,14 @@ void ShowLargestFeatures(const vector<double>& w) {
cerr << endl;
}
-void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
+bool InitCommandLine(int argc, char** argv, po::variables_map* conf) {
po::options_description opts("Configuration options");
opts.add_options()
("input_weights,w",po::value<string>(),"Input feature weights file")
("training_data,t",po::value<string>(),"Training data corpus")
("decoder_config,c",po::value<string>(),"Decoder configuration file")
("output_weights,o",po::value<string>()->default_value("-"),"Output feature weights file")
+ ("maximum_iteration,i", po::value<unsigned>(), "Maximum number of iterations")
("minibatch_size_per_proc,s", po::value<unsigned>()->default_value(5), "Number of training instances evaluated per processor in each minibatch")
("freeze_feature_set,Z", "The feature set specified in the initial weights file is frozen throughout the duration of training")
("optimization_method,m", po::value<string>()->default_value("sgd"), "Optimization method (sgd)")
@@ -89,9 +89,9 @@ void InitCommandLine(int argc, char** argv, po::variables_map* conf) {
if (conf->count("help") || !conf->count("training_data") || !conf->count("decoder_config")) {
cerr << dcmdline_options << endl;
- MPI::Finalize();
- exit(1);
+ return false;
}
+ return true;
}
void ReadTrainingCorpus(const string& fname, vector<string>* c) {
@@ -220,7 +220,8 @@ int main(int argc, char** argv) {
std::tr1::shared_ptr<MT19937> rng;
po::variables_map conf;
- InitCommandLine(argc, argv, &conf);
+ if (!InitCommandLine(argc, argv, &conf))
+ return 1;
// load initial weights
Weights weights;
@@ -292,6 +293,10 @@ int main(int argc, char** argv) {
observer.Reset();
decoder.SetWeights(lambdas);
if (rank == 0) {
+ if (conf.count("maximum_iteration")) {
+ if (iter == conf["maximum_iteration"].as<unsigned>())
+ converged = true;
+ }
SanityCheck(lambdas);
ShowLargestFeatures(lambdas);
string fname = "weights.cur.gz";