From 700b2abf48bf0a455064d6cf08754cbfd4e3a383 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 12 Sep 2011 19:22:59 +0100 Subject: source syntax features ~ blunsom emnlp 2008 --- decoder/ff_source_syntax.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 decoder/ff_source_syntax.h (limited to 'decoder/ff_source_syntax.h') diff --git a/decoder/ff_source_syntax.h b/decoder/ff_source_syntax.h new file mode 100644 index 00000000..1e890736 --- /dev/null +++ b/decoder/ff_source_syntax.h @@ -0,0 +1,24 @@ +#ifndef _FF_SOURCE_TOOLS_H_ +#define _FF_SOURCE_TOOLS_H_ + +#include "ff.h" + +struct SourceSyntaxFeaturesImpl; + +class SourceSyntaxFeatures : public FeatureFunction { + public: + SourceSyntaxFeatures(const std::string& param); + ~SourceSyntaxFeatures(); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const; + virtual void PrepareForInput(const SentenceMetadata& smeta); + private: + SourceSyntaxFeaturesImpl* impl; +}; + +#endif -- cgit v1.2.3 From 10cfa1082059db646148af1884117082335a48e7 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Sat, 17 Sep 2011 17:06:40 +0100 Subject: source span size features --- decoder/ff_source_syntax.cc | 62 +++++++++++++++++++++++++++++++++++++++++++++ decoder/ff_source_syntax.h | 17 +++++++++++++ 2 files changed, 79 insertions(+) (limited to 'decoder/ff_source_syntax.h') diff --git a/decoder/ff_source_syntax.cc b/decoder/ff_source_syntax.cc index ffe07f03..2df31c3a 100644 --- a/decoder/ff_source_syntax.cc +++ b/decoder/ff_source_syntax.cc @@ -157,3 +157,65 @@ void SourceSyntaxFeatures::PrepareForInput(const SentenceMetadata& smeta) { impl->InitializeGrids(smeta.GetSGMLValue("src_tree"), smeta.GetSourceLength()); } +struct SourceSpanSizeFeaturesImpl { + SourceSpanSizeFeaturesImpl() {} + + void InitializeGrids(unsigned src_len) { + fids.clear(); + fids.resize(src_len, src_len + 1); + } + + int FireFeatures(const TRule& rule, const int i, const int j, const WordID* ants, SparseVector* feats) { + int& fid = fids(i,j)[&rule]; + if (fid <= 0) { + ostringstream os; + os << "SSS:"; + unsigned ntc = 0; + for (unsigned k = 0; k < rule.f_.size(); ++k) { + if (k > 0) os << '_'; + int fj = rule.f_[k]; + if (fj <= 0) { + os << '[' << TD::Convert(-fj) << ants[ntc++] << ']'; + } else { + os << TD::Convert(fj); + } + } + fid = FD::Convert(os.str()); + } + if (fid > 0) + feats->set_value(fid, 1.0); + return SpanSizeTransform(j - i); + } + + mutable Array2D > fids; +}; + +SourceSpanSizeFeatures::SourceSpanSizeFeatures(const string& param) : + FeatureFunction(sizeof(char)) { + impl = new SourceSpanSizeFeaturesImpl; +} + +SourceSpanSizeFeatures::~SourceSpanSizeFeatures() { + delete impl; + impl = NULL; +} + +void SourceSpanSizeFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const { + int ants[8]; + for (unsigned i = 0; i < ant_contexts.size(); ++i) + ants[i] = *static_cast(ant_contexts[i]); + + *static_cast(context) = + impl->FireFeatures(*edge.rule_, edge.i_, edge.j_, ants, features); +} + +void SourceSpanSizeFeatures::PrepareForInput(const SentenceMetadata& smeta) { + impl->InitializeGrids(smeta.GetSourceLength()); +} + + diff --git a/decoder/ff_source_syntax.h b/decoder/ff_source_syntax.h index 1e890736..279563e1 100644 --- a/decoder/ff_source_syntax.h +++ b/decoder/ff_source_syntax.h @@ -21,4 +21,21 @@ class SourceSyntaxFeatures : public FeatureFunction { SourceSyntaxFeaturesImpl* impl; }; +struct SourceSpanSizeFeaturesImpl; +class SourceSpanSizeFeatures : public FeatureFunction { + public: + SourceSpanSizeFeatures(const std::string& param); + ~SourceSpanSizeFeatures(); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const; + virtual void PrepareForInput(const SentenceMetadata& smeta); + private: + SourceSpanSizeFeaturesImpl* impl; +}; + #endif -- cgit v1.2.3