From 9406dbc81fc1009fc3ab46191f535ec3d5a19d87 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Wed, 1 Jun 2011 01:26:55 -0400 Subject: rule bigram features --- decoder/ff_spans.cc | 39 +++++++++++++++++++++++++++++++++++++++ decoder/ff_spans.h | 15 +++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/decoder/ff_spans.cc b/decoder/ff_spans.cc index 89335682..e1da088d 100644 --- a/decoder/ff_spans.cc +++ b/decoder/ff_spans.cc @@ -182,6 +182,45 @@ void SpanFeatures::PrepareForInput(const SentenceMetadata& smeta) { } } +RuleNgramFeatures::RuleNgramFeatures(const std::string& param) { +} + +void RuleNgramFeatures::PrepareForInput(const SentenceMetadata& smeta) { +// std::map > + rule2_feats_.clear(); +} + +void RuleNgramFeatures::TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const { + map >::iterator it = rule2_feats_.find(edge.rule_.get()); + if (it == rule2_feats_.end()) { + const TRule& rule = *edge.rule_; + it = rule2_feats_.insert(make_pair(&rule, SparseVector())).first; + SparseVector& f = it->second; + string prev = ""; + for (int i = 0; i < rule.f_.size(); ++i) { + WordID w = rule.f_[i]; + if (w < 0) w = -w; + assert(w > 0); + const string& cur = TD::Convert(w); + ostringstream os; + os << "RB:" << prev << '_' << cur; + const int fid = FD::Convert(os.str()); + if (fid <= 0) return; + f.add_value(fid, 1.0); + prev = cur; + } + ostringstream os; + os << "RB:" << prev << '_' << ""; + f.set_value(FD::Convert(os.str()), 1.0); + } + (*features) += it->second; +} + inline bool IsArity2RuleReordered(const TRule& rule) { const vector& e = rule.e_; for (int i = 0; i < e.size(); ++i) { diff --git a/decoder/ff_spans.h b/decoder/ff_spans.h index 24e0dede..b22c4d03 100644 --- a/decoder/ff_spans.h +++ b/decoder/ff_spans.h @@ -44,6 +44,21 @@ class SpanFeatures : public FeatureFunction { WordID oov_; }; +class RuleNgramFeatures : public FeatureFunction { + public: + RuleNgramFeatures(const std::string& param); + protected: + virtual void TraversalFeaturesImpl(const SentenceMetadata& smeta, + const Hypergraph::Edge& edge, + const std::vector& ant_contexts, + SparseVector* features, + SparseVector* estimated_features, + void* context) const; + virtual void PrepareForInput(const SentenceMetadata& smeta); + private: + mutable std::map > rule2_feats_; +}; + class CMR2008ReorderingFeatures : public FeatureFunction { public: CMR2008ReorderingFeatures(const std::string& param); -- cgit v1.2.3