From ee4383b3bc67e2d8ce113fce716050dc2e1b8572 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 27 Jan 2010 12:34:28 +0000 Subject: add binary model2-like features --- decoder/cdec_ff.cc | 1 + decoder/ff_wordalign.cc | 37 +++++++++++++++++++++++++++++++++++++ decoder/ff_wordalign.h | 16 ++++++++++++++++ 3 files changed, 54 insertions(+) (limited to 'decoder') diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 437de428..34499398 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -14,6 +14,7 @@ void register_feature_functions() { global_ff_registry->Register("WordPenalty", new FFFactory); global_ff_registry->Register("SourceWordPenalty", new FFFactory); global_ff_registry->Register("RelativeSentencePosition", new FFFactory); + global_ff_registry->Register("Model2BinaryFeatures", new FFFactory); global_ff_registry->Register("MarkovJump", new FFFactory); global_ff_registry->Register("SourcePOSBigram", new FFFactory); global_ff_registry->Register("BlunsomSynchronousParseHack", new FFFactory); diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc index c9f90541..c1b66a5e 100644 --- a/decoder/ff_wordalign.cc +++ b/decoder/ff_wordalign.cc @@ -12,8 +12,45 @@ #include "tdict.h" // Blunsom hack #include "filelib.h" // Blunsom hack +static const size_t MAX_SENTENCE_SIZE = 100; + using namespace std; +Model2BinaryFeatures::Model2BinaryFeatures(const string& param) : + fids_(boost::extents[MAX_SENTENCE_SIZE][MAX_SENTENCE_SIZE][MAX_SENTENCE_SIZE]) { + for (int i = 0; i < MAX_SENTENCE_SIZE; ++i) { + for (int j = 0; j < MAX_SENTENCE_SIZE; ++j) { + for (int k = 0; k < MAX_SENTENCE_SIZE; ++k) { + int& val = fids_[i][j][k]; + val = -1; + if (j < i) { + ostringstream os; + os << "M2_" << i << '_' << j << ':' << k; + val = FD::Convert(os.str()); + } + } + } + } +} + +void Model2BinaryFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const vector& ant_states, + SparseVector* features, + SparseVector* estimated_features, + void* state) const { + // if the source word is either null or the generated word + // has no position in the reference + if (edge.i_ == -1 || edge.prev_i_ == -1) + return; + + assert(smeta.GetTargetLength() > 0); + const int fid = fids_[smeta.GetSourceLength()][edge.i_][edge.prev_i_]; + features->set_value(fid, 1.0); +// cerr << f_len_ << " " << e_len_ << " [" << edge.i_ << "," << edge.j_ << "|" << edge.prev_i_ << "," << edge.prev_j_ << "]\t" << edge.rule_->AsString() << "\tVAL=" << val << endl; +} + + RelativeSentencePosition::RelativeSentencePosition(const string& param) : fid_(FD::Convert("RelativeSentencePosition")) { if (!param.empty()) { diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h index aea4c950..582e8c9f 100644 --- a/decoder/ff_wordalign.h +++ b/decoder/ff_wordalign.h @@ -4,6 +4,8 @@ #include "ff.h" #include "array2d.h" +#include + class RelativeSentencePosition : public FeatureFunction { public: RelativeSentencePosition(const std::string& param); @@ -20,6 +22,20 @@ class RelativeSentencePosition : public FeatureFunction { std::string template_; }; +class Model2BinaryFeatures : public FeatureFunction { + public: + Model2BinaryFeatures(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* out_context) const; + private: + boost::multi_array fids_; +}; + class MarkovJump : public FeatureFunction { public: MarkovJump(const std::string& param); -- cgit v1.2.3