From f2fcf9e8aa0e5dee75fd08ee915488ec1a741975 Mon Sep 17 00:00:00 2001 From: Chris Dyer Date: Mon, 16 Apr 2012 19:39:36 +0100 Subject: feature extraction helper --- rst_parser/arc_factored.cc | 21 +++++++++++++++++++++ rst_parser/arc_factored.h | 5 ++++- 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'rst_parser') diff --git a/rst_parser/arc_factored.cc b/rst_parser/arc_factored.cc index 44e769b8..34c689f4 100644 --- a/rst_parser/arc_factored.cc +++ b/rst_parser/arc_factored.cc @@ -12,6 +12,27 @@ using namespace std; using namespace std::tr1; using namespace boost; +void EdgeSubset::ExtractFeatures(const TaggedSentence& sentence, + const std::vector >& ffs, + SparseVector* features) const { + SparseVector efmap; + for (int i = 0; i < ffs.size(); ++i) { + const ArcFeatureFunction& ff= *ffs[i]; + for (int j = 0; j < h_m_pairs.size(); ++j) { + efmap.clear(); + ff.EgdeFeatures(sentence, h_m_pairs[j].first, + h_m_pairs[j].second, + &efmap); + (*features) += efmap; + } + for (int j = 0; j < roots.size(); ++j) { + efmap.clear(); + ff.EgdeFeatures(sentence, -1, roots[j], &efmap); + (*features) += efmap; + } + } +} + void ArcFactoredForest::ExtractFeatures(const TaggedSentence& sentence, const std::vector >& ffs) { for (int i = 0; i < ffs.size(); ++i) { diff --git a/rst_parser/arc_factored.h b/rst_parser/arc_factored.h index 4de38b66..a271c8d4 100644 --- a/rst_parser/arc_factored.h +++ b/rst_parser/arc_factored.h @@ -17,14 +17,17 @@ struct TaggedSentence { std::vector pos; }; +struct ArcFeatureFunction; struct EdgeSubset { EdgeSubset() {} std::vector roots; // unless multiroot trees are supported, this // will have a single member std::vector > h_m_pairs; // h,m start at 0 + void ExtractFeatures(const TaggedSentence& sentence, + const std::vector >& ffs, + SparseVector* features) const; }; -struct ArcFeatureFunction; class ArcFactoredForest { public: ArcFactoredForest() : num_words_() {} -- cgit v1.2.3