diff options
-rw-r--r-- | decoder/cdec_ff.cc | 1 | ||||
-rw-r--r-- | decoder/ff_wordalign.cc | 37 | ||||
-rw-r--r-- | decoder/ff_wordalign.h | 16 |
3 files changed, 54 insertions, 0 deletions
diff --git a/decoder/cdec_ff.cc b/decoder/cdec_ff.cc index 437de428..34499398 100644 --- a/decoder/cdec_ff.cc +++ b/decoder/cdec_ff.cc @@ -14,6 +14,7 @@ void register_feature_functions() { global_ff_registry->Register("WordPenalty", new FFFactory<WordPenalty>); global_ff_registry->Register("SourceWordPenalty", new FFFactory<SourceWordPenalty>); global_ff_registry->Register("RelativeSentencePosition", new FFFactory<RelativeSentencePosition>); + global_ff_registry->Register("Model2BinaryFeatures", new FFFactory<Model2BinaryFeatures>); global_ff_registry->Register("MarkovJump", new FFFactory<MarkovJump>); global_ff_registry->Register("SourcePOSBigram", new FFFactory<SourcePOSBigram>); global_ff_registry->Register("BlunsomSynchronousParseHack", new FFFactory<BlunsomSynchronousParseHack>); diff --git a/decoder/ff_wordalign.cc b/decoder/ff_wordalign.cc index c9f90541..c1b66a5e 100644 --- a/decoder/ff_wordalign.cc +++ b/decoder/ff_wordalign.cc @@ -12,8 +12,45 @@ #include "tdict.h" // Blunsom hack #include "filelib.h" // Blunsom hack +static const size_t MAX_SENTENCE_SIZE = 100; + using namespace std; +Model2BinaryFeatures::Model2BinaryFeatures(const string& param) : + fids_(boost::extents[MAX_SENTENCE_SIZE][MAX_SENTENCE_SIZE][MAX_SENTENCE_SIZE]) { + for (int i = 0; i < MAX_SENTENCE_SIZE; ++i) { + for (int j = 0; j < MAX_SENTENCE_SIZE; ++j) { + for (int k = 0; k < MAX_SENTENCE_SIZE; ++k) { + int& val = fids_[i][j][k]; + val = -1; + if (j < i) { + ostringstream os; + os << "M2_" << i << '_' << j << ':' << k; + val = FD::Convert(os.str()); + } + } + } + } +} + +void Model2BinaryFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const vector<const void*>& ant_states, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* state) const { + // if the source word is either null or the generated word + // has no position in the reference + if (edge.i_ == -1 || edge.prev_i_ == -1) + return; + + assert(smeta.GetTargetLength() > 0); + const int fid = fids_[smeta.GetSourceLength()][edge.i_][edge.prev_i_]; + features->set_value(fid, 1.0); +// cerr << f_len_ << " " << e_len_ << " [" << edge.i_ << "," << edge.j_ << "|" << edge.prev_i_ << "," << edge.prev_j_ << "]\t" << edge.rule_->AsString() << "\tVAL=" << val << endl; +} + + RelativeSentencePosition::RelativeSentencePosition(const string& param) : fid_(FD::Convert("RelativeSentencePosition")) { if (!param.empty()) { diff --git a/decoder/ff_wordalign.h b/decoder/ff_wordalign.h index aea4c950..582e8c9f 100644 --- a/decoder/ff_wordalign.h +++ b/decoder/ff_wordalign.h @@ -4,6 +4,8 @@ #include "ff.h" #include "array2d.h" +#include <boost/multi_array.hpp> + class RelativeSentencePosition : public FeatureFunction { public: RelativeSentencePosition(const std::string& param); @@ -20,6 +22,20 @@ class RelativeSentencePosition : public FeatureFunction { std::string template_; }; +class Model2BinaryFeatures : public FeatureFunction { + public: + Model2BinaryFeatures(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector<const void*>& ant_contexts, + SparseVector<double>* features, + SparseVector<double>* estimated_features, + void* out_context) const; + private: + boost::multi_array<int, 3> fids_; +}; + class MarkovJump : public FeatureFunction { public: MarkovJump(const std::string& param); |